From 06aff4e7b0be6e358c53fe80249a518d60c57f91 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 13 Mar 2013 09:02:15 -0700 Subject: [PATCH 01/67] Initial Windows RT support commit. GPU module turned off; TIFF support turned off; Windows Managment and Video IO (VFW) turned off. --- CMakeLists.txt | 4 +++- modules/highgui/CMakeLists.txt | 8 ++++---- modules/highgui/src/cap.cpp | 7 ++++--- modules/highgui/src/cap_ffmpeg.cpp | 16 ++++++++-------- modules/highgui/src/precomp.hpp | 6 +++--- modules/highgui/src/window.cpp | 16 ++++++++-------- platforms/winrt/scripts/cmake_winrt.cmd | 8 ++++++++ 7 files changed, 38 insertions(+), 27 deletions(-) create mode 100644 platforms/winrt/scripts/cmake_winrt.cmd diff --git a/CMakeLists.txt b/CMakeLists.txt index 579312d40..e5868cf43 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,6 +60,8 @@ endif() project(OpenCV CXX C) +add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) + include(cmake/OpenCVUtils.cmake) # ---------------------------------------------------------------------------- @@ -608,7 +610,7 @@ else() status(" QT 4.x:" NO) endif() if(WIN32) - status(" Win32 UI:" YES) + status(" Win32 UI:" NO) else() if(APPLE) if(WITH_CARBON) diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 7e5fae39b..12cf2cbcd 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -90,7 +90,7 @@ if(HAVE_QT) set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) endif() elseif(WIN32) - list(APPEND highgui_srcs src/window_w32.cpp) + #list(APPEND highgui_srcs src/window_w32.cpp) elseif(HAVE_GTK) list(APPEND highgui_srcs src/window_gtk.cpp) elseif(APPLE) @@ -105,9 +105,9 @@ elseif(APPLE) endif() endif() -if(WIN32) - list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp src/cap_dshow.cpp) -endif(WIN32) +#if(WIN32) +# list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp src/cap_dshow.cpp) +#endif(WIN32) if(HAVE_XINE) list(APPEND highgui_srcs src/cap_xine.cpp) diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index 13475f263..237e3156e 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -197,9 +197,10 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) { #ifdef HAVE_VIDEOINPUT case CV_CAP_DSHOW: - capture = cvCreateCameraCapture_DShow (index); - if (capture) - return capture; + // capture = cvCreateCameraCapture_DShow (index); + // if (capture) + // return capture; + return NULL; break; #endif diff --git a/modules/highgui/src/cap_ffmpeg.cpp b/modules/highgui/src/cap_ffmpeg.cpp index 657502acf..640fc2da6 100644 --- a/modules/highgui/src/cap_ffmpeg.cpp +++ b/modules/highgui/src/cap_ffmpeg.cpp @@ -209,11 +209,11 @@ CvCapture* cvCreateFileCapture_FFMPEG_proxy(const char * filename) if( result->open( filename )) return result; delete result; -#if defined WIN32 || defined _WIN32 - return cvCreateFileCapture_VFW(filename); -#else +//#if defined WIN32 || defined _WIN32 +// return cvCreateFileCapture_VFW(filename); +//#else return 0; -#endif +//#endif } class CvVideoWriter_FFMPEG_proxy : @@ -263,9 +263,9 @@ CvVideoWriter* cvCreateVideoWriter_FFMPEG_proxy( const char* filename, int fourc if( result->open( filename, fourcc, fps, frameSize, isColor != 0 )) return result; delete result; -#if defined WIN32 || defined _WIN32 - return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor); -#else +// #if defined WIN32 || defined _WIN32 +// return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor); +// #else return 0; -#endif +//#endif } diff --git a/modules/highgui/src/precomp.hpp b/modules/highgui/src/precomp.hpp index 9572e3010..5790bf67a 100644 --- a/modules/highgui/src/precomp.hpp +++ b/modules/highgui/src/precomp.hpp @@ -103,12 +103,12 @@ struct CvVideoWriter virtual bool writeFrame(const IplImage*) { return false; } }; -#if defined WIN32 || defined _WIN32 -#define HAVE_VFW 1 +//#if defined WIN32 || defined _WIN32 +//#define HAVE_VFW 1 /* uncomment to enable CMUCamera1394 fireware camera module */ //#define HAVE_CMU1394 1 -#endif +//#endif CvCapture * cvCreateCameraCapture_V4L( int index ); diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index b6086076c..a26bc447a 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -57,7 +57,7 @@ CV_IMPL void cvSetWindowProperty(const char* name, int prop_id, double prop_valu #if defined (HAVE_QT) cvSetModeWindow_QT(name,prop_value); - #elif defined WIN32 || defined _WIN32 + #elif defined (HAVE_WIN32_UI) cvSetModeWindow_W32(name,prop_value); #elif defined (HAVE_GTK) cvSetModeWindow_GTK(name,prop_value); @@ -96,7 +96,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetModeWindow_QT(name); - #elif defined WIN32 || defined _WIN32 + #elif defined (HAVE_WIN32_UI) return cvGetModeWindow_W32(name); #elif defined (HAVE_GTK) return cvGetModeWindow_GTK(name); @@ -113,7 +113,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetPropWindow_QT(name); - #elif defined WIN32 || defined _WIN32 + #elif defined (HAVE_WIN32_UI) return cvGetPropWindowAutoSize_W32(name); #elif defined (HAVE_GTK) return cvGetPropWindowAutoSize_GTK(name); @@ -126,7 +126,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetRatioWindow_QT(name); - #elif defined WIN32 || defined _WIN32 + #elif defined (HAVE_WIN32_UI) return cvGetRatioWindow_W32(name); #elif defined (HAVE_GTK) return cvGetRatioWindow_GTK(name); @@ -139,7 +139,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetOpenGlProp_QT(name); - #elif defined WIN32 || defined _WIN32 + #elif defined (HAVE_WIN32_UI) return cvGetOpenGlProp_W32(name); #elif defined (HAVE_GTK) return cvGetOpenGlProp_GTK(name); @@ -450,11 +450,11 @@ int cv::createButton(const string&, ButtonCallback, void*, int , bool ) #endif -#if defined WIN32 || defined _WIN32 // see window_w32.cpp +#if defined (HAVE_WIN32_UI) // see window_w32.cpp #elif defined (HAVE_GTK) // see window_gtk.cpp -#elif defined (HAVE_COCOA) // see window_carbon.cpp +#elif defined (HAVE_COCOA) // see window_carbon.cpp #elif defined (HAVE_CARBON) -#elif defined (HAVE_QT) //YV see window_QT.cpp +#elif defined (HAVE_QT) //YV see window_QT.cpp #else diff --git a/platforms/winrt/scripts/cmake_winrt.cmd b/platforms/winrt/scripts/cmake_winrt.cmd new file mode 100644 index 000000000..18aafef02 --- /dev/null +++ b/platforms/winrt/scripts/cmake_winrt.cmd @@ -0,0 +1,8 @@ +mkdir build +cd build + +rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat" + +SET PATH=C:\Program Files\Ninja;%PATH% + +"C:\Program Files\CMake 2.8\bin\cmake.exe" -GNinja -DCMAKE_BUILD_TYPE=Release -DWITH_TIFF=OFF -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DENABLE_SSE=OFF -DENABLE_SSE2=OFF ..\..\.. \ No newline at end of file From 3d594c70ffd29e9bd4d3c93f64fae888514678bc Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 19 Mar 2013 17:46:01 -0700 Subject: [PATCH 02/67] Multiple Windows RT fixes TIFF build was fixed; Direct Show back-end was enabled; More accurate guards implemented. --- 3rdparty/tbb/CMakeLists.txt | 37 ++++++++++++++++++++++-------- CMakeLists.txt | 7 ++++-- modules/highgui/CMakeLists.txt | 3 ++- modules/highgui/src/cap.cpp | 6 ++--- modules/highgui/src/cap_ffmpeg.cpp | 16 ++++++------- modules/highgui/src/precomp.hpp | 6 ++--- modules/highgui/src/window.cpp | 12 +++++----- 7 files changed, 54 insertions(+), 33 deletions(-) diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index 3c6ae98f4..6a5d2a856 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -122,15 +122,32 @@ file(GLOB lib_srcs "${tbb_src_dir}/src/tbb/*.cpp") file(GLOB lib_hdrs "${tbb_src_dir}/src/tbb/*.h") list(APPEND lib_srcs "${tbb_src_dir}/src/rml/client/rml_tbb.cpp") -add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 #required - -D__TBB_BUILD=1 #required - -D__TBB_SURVIVE_THREAD_SWITCH=0 #no cilk support - -DUSE_PTHREAD #required for Unix - -DTBB_USE_GCC_BUILTINS=1 #required for ARM GCC - -DTBB_USE_DEBUG=0 #just to be sure - -DTBB_NO_LEGACY=1 #don't need backward compatibility - -DDO_ITT_NOTIFY=0 #it seems that we don't need these notifications - ) +if (WIN32) +add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 + -D__TBB_BUILD=1 + -D_UNICODE + -DUNICODE + -DWINAPI_FAMILY=WINAPI_FAMILY_APP + -DDO_ITT_NOTIFY=0 + ) # defines were copied from windows.cl.inc +set(CMAKE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS} /APPCONTAINER") +else() + add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 #required + -D__TBB_BUILD=1 #required + -D__TBB_SURVIVE_THREAD_SWITCH=0 #no cilk support + -DTBB_USE_DEBUG=0 #just to be sure + -DTBB_NO_LEGACY=1 #don't need backward compatibility + -DDO_ITT_NOTIFY=0 #it seems that we don't need these notifications + ) +endif() + +if (HAVE_LIBPTHREAD) + add_definitions(-DUSE_PTHREAD) #required for Unix +endif() + +if (CMAKE_COMPILER_IS_GNUCXX) + add_definitions(-DTBB_USE_GCC_BUILTINS=1) #required for ARM GCC +endif() if(ANDROID_COMPILER_IS_CLANG) add_definitions(-D__TBB_GCC_BUILTIN_ATOMICS_PRESENT=1) @@ -145,7 +162,7 @@ endif() set(TBB_SOURCE_FILES ${lib_srcs} ${lib_hdrs}) -if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") +if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") AND NOT WIN32) if (NOT ANDROID) set(TBB_SOURCE_FILES ${TBB_SOURCE_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/arm_linux_stub.cpp") endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index e5868cf43..1f997ba17 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,7 +60,10 @@ endif() project(OpenCV CXX C) -add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) +if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") AND WIN32) + set(CMAKE_REQUIRED_DEFINITIONS -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) + add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) +endif() include(cmake/OpenCVUtils.cmake) @@ -171,7 +174,7 @@ OCV_OPTION(BUILD_JASPER "Build libjasper from source" WIN32 O OCV_OPTION(BUILD_JPEG "Build libjpeg from source" WIN32 OR ANDROID OR APPLE ) OCV_OPTION(BUILD_PNG "Build libpng from source" WIN32 OR ANDROID OR APPLE ) OCV_OPTION(BUILD_OPENEXR "Build openexr from source" WIN32 OR ANDROID OR APPLE ) -OCV_OPTION(BUILD_TBB "Download and build TBB from source" ANDROID IF CMAKE_COMPILER_IS_GNUCXX ) +OCV_OPTION(BUILD_TBB "Download and build TBB from source" ANDROID ) # OpenCV installation options # =================================================== diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 12cf2cbcd..b8600e9b0 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -106,7 +106,8 @@ elseif(APPLE) endif() #if(WIN32) -# list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp src/cap_dshow.cpp) + list(APPEND highgui_srcs src/cap_dshow.cpp) + #list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp) #endif(WIN32) if(HAVE_XINE) diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index 237e3156e..2c754cadc 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -197,9 +197,9 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) { #ifdef HAVE_VIDEOINPUT case CV_CAP_DSHOW: - // capture = cvCreateCameraCapture_DShow (index); - // if (capture) - // return capture; + capture = cvCreateCameraCapture_DShow (index); + if (capture) + return capture; return NULL; break; #endif diff --git a/modules/highgui/src/cap_ffmpeg.cpp b/modules/highgui/src/cap_ffmpeg.cpp index 640fc2da6..22caab854 100644 --- a/modules/highgui/src/cap_ffmpeg.cpp +++ b/modules/highgui/src/cap_ffmpeg.cpp @@ -209,11 +209,11 @@ CvCapture* cvCreateFileCapture_FFMPEG_proxy(const char * filename) if( result->open( filename )) return result; delete result; -//#if defined WIN32 || defined _WIN32 -// return cvCreateFileCapture_VFW(filename); -//#else +#if (defined WIN32 || defined _WIN32) && !defined _M_ARM + return cvCreateFileCapture_VFW(filename); +#else return 0; -//#endif +#endif } class CvVideoWriter_FFMPEG_proxy : @@ -263,9 +263,9 @@ CvVideoWriter* cvCreateVideoWriter_FFMPEG_proxy( const char* filename, int fourc if( result->open( filename, fourcc, fps, frameSize, isColor != 0 )) return result; delete result; -// #if defined WIN32 || defined _WIN32 -// return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor); -// #else +#if (defined WIN32 || defined _WIN32) && !defined _M_ARM + return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor); + #else return 0; -//#endif +#endif } diff --git a/modules/highgui/src/precomp.hpp b/modules/highgui/src/precomp.hpp index 5790bf67a..64efbb90d 100644 --- a/modules/highgui/src/precomp.hpp +++ b/modules/highgui/src/precomp.hpp @@ -103,12 +103,12 @@ struct CvVideoWriter virtual bool writeFrame(const IplImage*) { return false; } }; -//#if defined WIN32 || defined _WIN32 -//#define HAVE_VFW 1 +#if (defined WIN32 || defined _WIN32) && !defined _M_ARM +#define HAVE_VFW 1 /* uncomment to enable CMUCamera1394 fireware camera module */ //#define HAVE_CMU1394 1 -//#endif +#endif CvCapture * cvCreateCameraCapture_V4L( int index ); diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index a26bc447a..12816f3cb 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -57,7 +57,7 @@ CV_IMPL void cvSetWindowProperty(const char* name, int prop_id, double prop_valu #if defined (HAVE_QT) cvSetModeWindow_QT(name,prop_value); - #elif defined (HAVE_WIN32_UI) + #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM cvSetModeWindow_W32(name,prop_value); #elif defined (HAVE_GTK) cvSetModeWindow_GTK(name,prop_value); @@ -96,7 +96,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetModeWindow_QT(name); - #elif defined (HAVE_WIN32_UI) + #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM return cvGetModeWindow_W32(name); #elif defined (HAVE_GTK) return cvGetModeWindow_GTK(name); @@ -113,7 +113,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetPropWindow_QT(name); - #elif defined (HAVE_WIN32_UI) + #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM return cvGetPropWindowAutoSize_W32(name); #elif defined (HAVE_GTK) return cvGetPropWindowAutoSize_GTK(name); @@ -126,7 +126,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetRatioWindow_QT(name); - #elif defined (HAVE_WIN32_UI) + #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM return cvGetRatioWindow_W32(name); #elif defined (HAVE_GTK) return cvGetRatioWindow_GTK(name); @@ -139,7 +139,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetOpenGlProp_QT(name); - #elif defined (HAVE_WIN32_UI) + #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM return cvGetOpenGlProp_W32(name); #elif defined (HAVE_GTK) return cvGetOpenGlProp_GTK(name); @@ -450,7 +450,7 @@ int cv::createButton(const string&, ButtonCallback, void*, int , bool ) #endif -#if defined (HAVE_WIN32_UI) // see window_w32.cpp +#if (defined WIN32 || defined _WIN32) && !defined _M_ARM // see window_w32.cpp #elif defined (HAVE_GTK) // see window_gtk.cpp #elif defined (HAVE_COCOA) // see window_carbon.cpp #elif defined (HAVE_CARBON) From 429cd85b46f85418a3fea5a6e2d70e9ccfba9639 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 20 Mar 2013 12:12:58 -0700 Subject: [PATCH 03/67] More accurate guards for WinRT features in CMake. --- 3rdparty/tbb/CMakeLists.txt | 4 ++-- CMakeLists.txt | 7 +------ cmake/OpenCVDetectCXXCompiler.cmake | 2 ++ modules/highgui/CMakeLists.txt | 12 ++++++++---- platforms/winrt/arm.winrt.toolchain.cmake | 5 +++++ platforms/winrt/scripts/cmake_winrt.cmd | 4 +--- 6 files changed, 19 insertions(+), 15 deletions(-) create mode 100644 platforms/winrt/arm.winrt.toolchain.cmake diff --git a/3rdparty/tbb/CMakeLists.txt b/3rdparty/tbb/CMakeLists.txt index 6a5d2a856..af1581349 100644 --- a/3rdparty/tbb/CMakeLists.txt +++ b/3rdparty/tbb/CMakeLists.txt @@ -123,7 +123,7 @@ file(GLOB lib_hdrs "${tbb_src_dir}/src/tbb/*.h") list(APPEND lib_srcs "${tbb_src_dir}/src/rml/client/rml_tbb.cpp") if (WIN32) -add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 + add_definitions(-D__TBB_DYNAMIC_LOAD_ENABLED=0 -D__TBB_BUILD=1 -D_UNICODE -DUNICODE @@ -162,7 +162,7 @@ endif() set(TBB_SOURCE_FILES ${lib_srcs} ${lib_hdrs}) -if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") AND NOT WIN32) +if (ARM AND NOT WIN32) if (NOT ANDROID) set(TBB_SOURCE_FILES ${TBB_SOURCE_FILES} "${CMAKE_CURRENT_SOURCE_DIR}/arm_linux_stub.cpp") endif() diff --git a/CMakeLists.txt b/CMakeLists.txt index 1f997ba17..22ee7fe7b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -60,11 +60,6 @@ endif() project(OpenCV CXX C) -if ((${CMAKE_SYSTEM_PROCESSOR} MATCHES "arm") AND WIN32) - set(CMAKE_REQUIRED_DEFINITIONS -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) - add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) -endif() - include(cmake/OpenCVUtils.cmake) # ---------------------------------------------------------------------------- @@ -613,7 +608,7 @@ else() status(" QT 4.x:" NO) endif() if(WIN32) - status(" Win32 UI:" NO) + status(" Win32 UI:" ARM THEN NO ELSE YES) else() if(APPLE) if(WITH_CARBON) diff --git a/cmake/OpenCVDetectCXXCompiler.cmake b/cmake/OpenCVDetectCXXCompiler.cmake index 6e0278000..504004bce 100644 --- a/cmake/OpenCVDetectCXXCompiler.cmake +++ b/cmake/OpenCVDetectCXXCompiler.cmake @@ -103,4 +103,6 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" OR CMAKE_GENERATOR set(X86_64 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") set(X86 1) +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*|ARM.*") + set(ARM 1) endif() diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index b8600e9b0..6d92455fa 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -90,7 +90,9 @@ if(HAVE_QT) set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) endif() elseif(WIN32) - #list(APPEND highgui_srcs src/window_w32.cpp) + if (NOT ARM) + list(APPEND highgui_srcs src/window_w32.cpp) + endif() elseif(HAVE_GTK) list(APPEND highgui_srcs src/window_gtk.cpp) elseif(APPLE) @@ -105,10 +107,12 @@ elseif(APPLE) endif() endif() -#if(WIN32) +if(WIN32) list(APPEND highgui_srcs src/cap_dshow.cpp) - #list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp) -#endif(WIN32) + if (NOT ARM) + list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp) + endif() +endif(WIN32) if(HAVE_XINE) list(APPEND highgui_srcs src/cap_xine.cpp) diff --git a/platforms/winrt/arm.winrt.toolchain.cmake b/platforms/winrt/arm.winrt.toolchain.cmake new file mode 100644 index 000000000..01854b598 --- /dev/null +++ b/platforms/winrt/arm.winrt.toolchain.cmake @@ -0,0 +1,5 @@ +set(CMAKE_SYSTEM_NAME Windows) # WindowsRT breaks cmake 2.8.10.2 and earler +set(CMAKE_SYSTEM_PROCESSOR "arm") + +set(CMAKE_REQUIRED_DEFINITIONS -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) +add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) \ No newline at end of file diff --git a/platforms/winrt/scripts/cmake_winrt.cmd b/platforms/winrt/scripts/cmake_winrt.cmd index 18aafef02..3dd20e4d3 100644 --- a/platforms/winrt/scripts/cmake_winrt.cmd +++ b/platforms/winrt/scripts/cmake_winrt.cmd @@ -3,6 +3,4 @@ cd build rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat" -SET PATH=C:\Program Files\Ninja;%PATH% - -"C:\Program Files\CMake 2.8\bin\cmake.exe" -GNinja -DCMAKE_BUILD_TYPE=Release -DWITH_TIFF=OFF -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DENABLE_SSE=OFF -DENABLE_SSE2=OFF ..\..\.. \ No newline at end of file +cmake.exe -GNinja -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\.. From 7ec2b6bad01898282371beb49a5fae243f66ce8f Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 21 Mar 2013 16:18:19 +0400 Subject: [PATCH 04/67] Highgui tests for GUI disabled for WinRT; Warnings fix; --- modules/highgui/test/test_gui.cpp | 2 +- .../stitching/include/opencv2/stitching/detail/warpers.hpp | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/highgui/test/test_gui.cpp b/modules/highgui/test/test_gui.cpp index 285203cb0..e802f296f 100644 --- a/modules/highgui/test/test_gui.cpp +++ b/modules/highgui/test/test_gui.cpp @@ -43,7 +43,7 @@ #include "test_precomp.hpp" #include "opencv2/highgui/highgui.hpp" -#if defined HAVE_GTK || defined HAVE_QT || defined WIN32 || defined _WIN32 || defined HAVE_CARBON || defined HAVE_COCOA +#if defined HAVE_GTK || defined HAVE_QT || ((defined WIN32 || defined _WIN32) && !_M_ARM) || defined HAVE_CARBON || defined HAVE_COCOA using namespace cv; using namespace std; diff --git a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp index 4a7cc4e8f..2bd46f75a 100644 --- a/modules/stitching/include/opencv2/stitching/detail/warpers.hpp +++ b/modules/stitching/include/opencv2/stitching/detail/warpers.hpp @@ -460,7 +460,7 @@ struct SphericalPortraitProjector : ProjectorBase // Projects image onto unit sphere with origin at (0, 0, 0). // Poles are located NOT at (0, -1, 0) and (0, 1, 0) points, BUT at (1, 0, 0) and (-1, 0, 0) points. -class SphericalPortraitWarper : public RotationWarperBase +class CV_EXPORTS SphericalPortraitWarper : public RotationWarperBase { public: SphericalPortraitWarper(float scale) { projector_.scale = scale; } @@ -476,7 +476,7 @@ struct CylindricalPortraitProjector : ProjectorBase }; -class CylindricalPortraitWarper : public RotationWarperBase +class CV_EXPORTS CylindricalPortraitWarper : public RotationWarperBase { public: CylindricalPortraitWarper(float scale) { projector_.scale = scale; } @@ -495,7 +495,7 @@ struct PlanePortraitProjector : ProjectorBase }; -class PlanePortraitWarper : public RotationWarperBase +class CV_EXPORTS PlanePortraitWarper : public RotationWarperBase { public: PlanePortraitWarper(float scale) { projector_.scale = scale; } From 6f68640d4d0fb524645c25b87a064fd2fbae81fb Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 26 Mar 2013 17:19:52 -0700 Subject: [PATCH 05/67] Multiple fixes for WinRT Fixed flann build with NEON; Fixed Haming distance with NEON; Honest cvRound for WinRT added; cvRound test added; Video IO with direct show disabled; --- CMakeLists.txt | 2 +- cmake/OpenCVFindLibsVideo.cmake | 2 +- .../core/include/opencv2/core/internal.hpp | 1 - modules/core/include/opencv2/core/types_c.h | 7 +- modules/core/src/stat.cpp | 64 +++++++++---------- modules/core/test/test_arithm.cpp | 13 ++++ modules/flann/include/opencv2/flann/dist.h | 7 +- modules/highgui/CMakeLists.txt | 15 ++--- platforms/winrt/scripts/cmake_winrt.cmd | 2 +- 9 files changed, 59 insertions(+), 54 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 22ee7fe7b..9b7f8c2d7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -138,7 +138,7 @@ OCV_OPTION(WITH_CSTRIPES "Include C= support" OFF OCV_OPTION(WITH_TIFF "Include TIFF support" ON IF (NOT IOS) ) OCV_OPTION(WITH_UNICAP "Include Unicap support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_V4L "Include Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) ) -OCV_OPTION(WITH_VIDEOINPUT "Build HighGUI with DirectShow support" ON IF WIN32 ) +OCV_OPTION(WITH_VIDEOINPUT "Build HighGUI with DirectShow support" ON IF WIN32 AND NOT ARM ) OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF IF (NOT ANDROID AND NOT APPLE) ) OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) ) diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index 414918527..3556ba562 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -111,7 +111,7 @@ endif(WITH_XIMEA) # --- FFMPEG --- ocv_clear_vars(HAVE_FFMPEG HAVE_FFMPEG_CODEC HAVE_FFMPEG_FORMAT HAVE_FFMPEG_UTIL HAVE_FFMPEG_SWSCALE HAVE_GENTOO_FFMPEG HAVE_FFMPEG_FFMPEG) if(WITH_FFMPEG) - if(WIN32) + if(WIN32 AND NOT ARM) include("${OpenCV_SOURCE_DIR}/3rdparty/ffmpeg/ffmpeg_version.cmake") elseif(UNIX) CHECK_MODULE(libavcodec HAVE_FFMPEG_CODEC) diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 5335fa01f..8902e69de 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -136,7 +136,6 @@ CV_INLINE IppiSize ippiSize(int width, int height) #ifdef __ARM_NEON__ # include # define CV_NEON 1 -# define CPU_HAS_NEON_FEATURE (true) #endif #ifndef CV_SSE diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h index 33e7fe993..be959a51c 100644 --- a/modules/core/include/opencv2/core/types_c.h +++ b/modules/core/include/opencv2/core/types_c.h @@ -323,7 +323,12 @@ CV_INLINE int cvRound( double value ) # endif #else // while this is not IEEE754-compliant rounding, it's usually a good enough approximation - return (int)(value + (value >= 0 ? 0.5 : -0.5)); + double intpart, fractpart; + fractpart = modf(value, &intpart); + if ((abs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0)) + return (int)(value + (value >= 0 ? 0.5 : -0.5)); + else + return (int)intpart; #endif } diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index b62f10a2a..e069e5298 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -999,25 +999,22 @@ static int normHamming(const uchar* a, int n) { int i = 0, result = 0; #if CV_NEON - if (CPU_HAS_NEON_FEATURE) - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t bitsSet = vcntq_u8 (A_vec); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t bitsSet = vcntq_u8 (A_vec); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); } - else -#endif - for( ; i <= n - 4; i += 4 ) + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); +#else + for( ; i <= n - 4; i += 4 ) result += popCountTable[a[i]] + popCountTable[a[i+1]] + popCountTable[a[i+2]] + popCountTable[a[i+3]]; +#endif for( ; i < n; i++ ) result += popCountTable[a[i]]; return result; @@ -1027,27 +1024,24 @@ int normHamming(const uchar* a, const uchar* b, int n) { int i = 0, result = 0; #if CV_NEON - if (CPU_HAS_NEON_FEATURE) - { - uint32x4_t bits = vmovq_n_u32(0); - for (; i <= n - 16; i += 16) { - uint8x16_t A_vec = vld1q_u8 (a + i); - uint8x16_t B_vec = vld1q_u8 (b + i); - uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); - uint8x16_t bitsSet = vcntq_u8 (AxorB); - uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); - uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); - bits = vaddq_u32(bits, bitSet4); - } - uint64x2_t bitSet2 = vpaddlq_u32 (bits); - result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); - result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); + uint32x4_t bits = vmovq_n_u32(0); + for (; i <= n - 16; i += 16) { + uint8x16_t A_vec = vld1q_u8 (a + i); + uint8x16_t B_vec = vld1q_u8 (b + i); + uint8x16_t AxorB = veorq_u8 (A_vec, B_vec); + uint8x16_t bitsSet = vcntq_u8 (AxorB); + uint16x8_t bitSet8 = vpaddlq_u8 (bitsSet); + uint32x4_t bitSet4 = vpaddlq_u16 (bitSet8); + bits = vaddq_u32(bits, bitSet4); } - else + uint64x2_t bitSet2 = vpaddlq_u32 (bits); + result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); + result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); +#else + for( ; i <= n - 4; i += 4 ) + result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + + popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; #endif - for( ; i <= n - 4; i += 4 ) - result += popCountTable[a[i] ^ b[i]] + popCountTable[a[i+1] ^ b[i+1]] + - popCountTable[a[i+2] ^ b[i+2]] + popCountTable[a[i+3] ^ b[i+3]]; for( ; i < n; i++ ) result += popCountTable[a[i] ^ b[i]]; return result; diff --git a/modules/core/test/test_arithm.cpp b/modules/core/test/test_arithm.cpp index ebc9eae64..a3e61f22a 100644 --- a/modules/core/test/test_arithm.cpp +++ b/modules/core/test/test_arithm.cpp @@ -1551,3 +1551,16 @@ TEST(Core_Add, AddToColumnWhen4Rows) ASSERT_EQ(0, countNonZero(m1 - m2)); } + +TEST(Core_round, CvRound) +{ + ASSERT_EQ(2, cvRound(2.0)); + ASSERT_EQ(2, cvRound(2.1)); + ASSERT_EQ(-2, cvRound(-2.1)); + ASSERT_EQ(3, cvRound(2.8)); + ASSERT_EQ(-3, cvRound(-2.8)); + ASSERT_EQ(2, cvRound(2.5)); + ASSERT_EQ(4, cvRound(3.5)); + ASSERT_EQ(-2, cvRound(-2.5)); + ASSERT_EQ(-4, cvRound(-3.5)); +} \ No newline at end of file diff --git a/modules/flann/include/opencv2/flann/dist.h b/modules/flann/include/opencv2/flann/dist.h index d2674305c..7380d0c5d 100644 --- a/modules/flann/include/opencv2/flann/dist.h +++ b/modules/flann/include/opencv2/flann/dist.h @@ -456,7 +456,6 @@ struct Hamming ResultType operator()(Iterator1 a, Iterator2 b, size_t size, ResultType /*worst_dist*/ = -1) const { ResultType result = 0; -#ifdef __GNUC__ #ifdef __ARM_NEON__ { uint32x4_t bits = vmovq_n_u32(0); @@ -473,7 +472,7 @@ struct Hamming result = vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),0); result += vgetq_lane_s32 (vreinterpretq_s32_u64(bitSet2),2); } -#else +#elif __GNUC__ { //for portability just use unsigned long -- and use the __builtin_popcountll (see docs for __builtin_popcountll) typedef unsigned long long pop_t; @@ -493,8 +492,8 @@ struct Hamming result += __builtin_popcountll(a_final ^ b_final); } } -#endif //NEON -#else +#else // NO NEON and NOT GNUC + typedef unsigned long long pop_t; HammingLUT lut; result = lut(reinterpret_cast (a), reinterpret_cast (b), size * sizeof(pop_t)); diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 6d92455fa..59ec616d3 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -89,10 +89,8 @@ if(HAVE_QT) if(${_have_flag}) set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) endif() -elseif(WIN32) - if (NOT ARM) - list(APPEND highgui_srcs src/window_w32.cpp) - endif() +elseif(WIN32 AND NOT ARM) + list(APPEND highgui_srcs src/window_w32.cpp) elseif(HAVE_GTK) list(APPEND highgui_srcs src/window_gtk.cpp) elseif(APPLE) @@ -107,12 +105,9 @@ elseif(APPLE) endif() endif() -if(WIN32) - list(APPEND highgui_srcs src/cap_dshow.cpp) - if (NOT ARM) - list(APPEND highgui_srcs src/cap_vfw.cpp src/cap_cmu.cpp) - endif() -endif(WIN32) +if(WIN32 AND NOT ARM) + list(APPEND highgui_srcs src/cap_dshow.cpp src/cap_vfw.cpp src/cap_cmu.cpp) +endif() if(HAVE_XINE) list(APPEND highgui_srcs src/cap_xine.cpp) diff --git a/platforms/winrt/scripts/cmake_winrt.cmd b/platforms/winrt/scripts/cmake_winrt.cmd index 3dd20e4d3..aafed7d09 100644 --- a/platforms/winrt/scripts/cmake_winrt.cmd +++ b/platforms/winrt/scripts/cmake_winrt.cmd @@ -3,4 +3,4 @@ cd build rem call "C:\Program Files\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat" -cmake.exe -GNinja -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\.. +cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DWITH_FFMPEG=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DCMAKE_TOOLCHAIN_FILE=..\..\winrt\arm.winrt.toolchain.cmake ..\..\.. From 5539e85a1179c51da0b709fa48a516ca67370847 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 27 Mar 2013 12:04:48 +0800 Subject: [PATCH 06/67] use perf test replace performance sample --- modules/ocl/perf/interpolation.hpp | 120 - modules/ocl/perf/main.cpp | 200 +- modules/ocl/perf/perf_arithm.cpp | 4977 +++-------------- modules/ocl/perf/perf_blend.cpp | 134 +- modules/ocl/perf/perf_brute_force_matcher.cpp | 150 + modules/ocl/perf/perf_canny.cpp | 122 +- modules/ocl/perf/perf_color.cpp | 91 + modules/ocl/perf/perf_columnsum.cpp | 112 +- modules/ocl/perf/perf_fft.cpp | 105 +- modules/ocl/perf/perf_filters.cpp | 1349 +---- modules/ocl/perf/perf_gemm.cpp | 105 +- modules/ocl/perf/perf_haar.cpp | 198 +- modules/ocl/perf/perf_hog.cpp | 150 +- modules/ocl/perf/perf_imgproc.cpp | 2683 +++------ modules/ocl/perf/perf_match_template.cpp | 278 +- modules/ocl/perf/perf_matrix_operation.cpp | 781 +-- modules/ocl/perf/perf_norm.cpp | 84 + modules/ocl/perf/perf_pyrdown.cpp | 126 +- modules/ocl/perf/perf_pyrlk.cpp | 143 + modules/ocl/perf/perf_pyrup.cpp | 109 +- modules/ocl/perf/perf_split_merge.cpp | 519 +- modules/ocl/perf/precomp.cpp | 330 +- modules/ocl/perf/precomp.hpp | 386 +- modules/ocl/perf/utility.cpp | 265 - modules/ocl/perf/utility.hpp | 182 - samples/ocl/performance.cpp | 4397 --------------- 26 files changed, 3791 insertions(+), 14305 deletions(-) delete mode 100644 modules/ocl/perf/interpolation.hpp create mode 100644 modules/ocl/perf/perf_brute_force_matcher.cpp create mode 100644 modules/ocl/perf/perf_color.cpp create mode 100644 modules/ocl/perf/perf_norm.cpp create mode 100644 modules/ocl/perf/perf_pyrlk.cpp delete mode 100644 modules/ocl/perf/utility.cpp delete mode 100644 modules/ocl/perf/utility.hpp delete mode 100644 samples/ocl/performance.cpp diff --git a/modules/ocl/perf/interpolation.hpp b/modules/ocl/perf/interpolation.hpp deleted file mode 100644 index fb89e701d..000000000 --- a/modules/ocl/perf/interpolation.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_TEST_INTERPOLATION_HPP__ -#define __OPENCV_TEST_INTERPOLATION_HPP__ - -template T readVal(const cv::Mat &src, int y, int x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) -{ - if (border_type == cv::BORDER_CONSTANT) - return (y >= 0 && y < src.rows && x >= 0 && x < src.cols) ? src.at(y, x * src.channels() + c) : cv::saturate_cast(borderVal.val[c]); - - return src.at(cv::borderInterpolate(y, src.rows, border_type), cv::borderInterpolate(x, src.cols, border_type) * src.channels() + c); -} - -template struct NearestInterpolator -{ - static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) - { - return readVal(src, cvFloor(y), cvFloor(x), c, border_type, borderVal); - } -}; - -template struct LinearInterpolator -{ - static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) - { - x -= 0.5f; - y -= 0.5f; - - int x1 = cvFloor(x); - int y1 = cvFloor(y); - int x2 = x1 + 1; - int y2 = y1 + 1; - - float res = 0; - - res += readVal(src, y1, x1, c, border_type, borderVal) * ((x2 - x) * (y2 - y)); - res += readVal(src, y1, x2, c, border_type, borderVal) * ((x - x1) * (y2 - y)); - res += readVal(src, y2, x1, c, border_type, borderVal) * ((x2 - x) * (y - y1)); - res += readVal(src, y2, x2, c, border_type, borderVal) * ((x - x1) * (y - y1)); - - return cv::saturate_cast(res); - } -}; - -template struct CubicInterpolator -{ - static float getValue(float p[4], float x) - { - return p[1] + 0.5 * x * (p[2] - p[0] + x * (2.0 * p[0] - 5.0 * p[1] + 4.0 * p[2] - p[3] + x * (3.0 * (p[1] - p[2]) + p[3] - p[0]))); - } - - static float getValue(float p[4][4], float x, float y) - { - float arr[4]; - - arr[0] = getValue(p[0], x); - arr[1] = getValue(p[1], x); - arr[2] = getValue(p[2], x); - arr[3] = getValue(p[3], x); - - return getValue(arr, y); - } - - static T getValue(const cv::Mat &src, float y, float x, int c, int border_type, cv::Scalar borderVal = cv::Scalar()) - { - int ix = cvRound(x); - int iy = cvRound(y); - - float vals[4][4] = - { - {readVal(src, iy - 2, ix - 2, c, border_type, borderVal), readVal(src, iy - 2, ix - 1, c, border_type, borderVal), readVal(src, iy - 2, ix, c, border_type, borderVal), readVal(src, iy - 2, ix + 1, c, border_type, borderVal)}, - {readVal(src, iy - 1, ix - 2, c, border_type, borderVal), readVal(src, iy - 1, ix - 1, c, border_type, borderVal), readVal(src, iy - 1, ix, c, border_type, borderVal), readVal(src, iy - 1, ix + 1, c, border_type, borderVal)}, - {readVal(src, iy , ix - 2, c, border_type, borderVal), readVal(src, iy , ix - 1, c, border_type, borderVal), readVal(src, iy , ix, c, border_type, borderVal), readVal(src, iy , ix + 1, c, border_type, borderVal)}, - {readVal(src, iy + 1, ix - 2, c, border_type, borderVal), readVal(src, iy + 1, ix - 1, c, border_type, borderVal), readVal(src, iy + 1, ix, c, border_type, borderVal), readVal(src, iy + 1, ix + 1, c, border_type, borderVal)}, - }; - - return cv::saturate_cast(getValue(vals, (x - ix + 2.0) / 4.0, (y - iy + 2.0) / 4.0)); - } -}; - -#endif // __OPENCV_TEST_INTERPOLATION_HPP__ diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index e517a371d..2da17755e 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -7,12 +7,13 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -// + // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,12 +22,12 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -41,129 +42,118 @@ #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace std; -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; - -void print_info() +int main(int argc, const char *argv[]) { - printf("\n"); -#if defined _WIN32 -# if defined _WIN64 - puts("OS: Windows 64"); -# else - puts("OS: Windows 32"); -# endif -#elif defined linux -# if defined _LP64 - puts("OS: Linux 64"); -# else - puts("OS: Linux 32"); -# endif -#elif defined __APPLE__ -# if defined _LP64 - puts("OS: Apple 64"); -# else - puts("OS: Apple 32"); -# endif -#endif + vector oclinfo; + int num_devices = getDevice(oclinfo); + + if (num_devices < 1) + { + cerr << "no device found\n"; + return -1; + } + + int devidx = 0; + + for (size_t i = 0; i < oclinfo.size(); i++) + { + for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++) + { + printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str()); + } + } + + redirectError(cvErrorCallback); -} -std::string workdir; -int main(int argc, char **argv) -{ - TS::ptr()->init("ocl"); - InitGoogleTest(&argc, argv); const char *keys = - - "{ h | help | false | print help message }" - - "{ w | workdir | ../../../samples/c/| set working directory }" - - "{ t | type | gpu | set device type:cpu or gpu}" - - "{ p | platform | 0 | set platform id }" - - "{ d | device | 0 | set device id }"; - - + "{ h | help | false | print help message }" + "{ f | filter | | filter for test }" + "{ w | workdir | | set working directory }" + "{ l | list | false | show all tests }" + "{ d | device | 0 | device id }" + "{ i | iters | 10 | iteration count }" + "{ m | warmup | 1 | gpu warm up iteration count}" + "{ t | xtop | 1.1 | xfactor top boundary}" + "{ b | xbottom | 0.9 | xfactor bottom boundary}" + "{ v | verify | false | only run gpu once to verify if problems occur}"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) - { - - cout << "Avaible options besides goole test option:" << endl; - + cout << "Avaible options:" << endl; cmd.printParams(); + return 0; } - workdir = cmd.get("workdir"); - - string type = cmd.get("type"); - - unsigned int pid = cmd.get("platform"); - int device = cmd.get("device"); - - print_info(); - // int flag = CVCL_DEVICE_TYPE_GPU; - - // if(type == "cpu") - - // { - - // flag = CVCL_DEVICE_TYPE_CPU; - - // } - std::vector oclinfo; - int devnums = getDevice(oclinfo); - if(devnums <= device || device < 0) - + if (device < 0 || device >= num_devices) { - - std::cout << "device invalid\n"; - + cerr << "Invalid device ID" << endl; return -1; - } - if(pid >= oclinfo.size()) - + if (cmd.get("verify")) { - - std::cout << "platform invalid\n"; - - return -1; - + TestSystem::instance().setNumIters(1); + TestSystem::instance().setGPUWarmupIters(0); + TestSystem::instance().setCPUIters(0); } - if(pid != 0 || device != 0) + devidx = 0; + for (size_t i = 0; i < oclinfo.size(); i++) { - - setDevice(oclinfo[pid], device); - + for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++) + { + if (device == devidx) + { + ocl::setDevice(oclinfo[i], (int)j); + TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]); + printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str()); + goto END_DEV; + } + } } - cout << "Device type:" << type << endl << "Device name:" << oclinfo[pid].DeviceName[device] << endl; - setBinpath(CLBINPATH); - return RUN_ALL_TESTS(); -} +END_DEV: -#else // DON'T HAVE_OPENCL + string filter = cmd.get("filter"); + string workdir = cmd.get("workdir"); + bool list = cmd.get("list"); + int iters = cmd.get("iters"); + int wu_iters = cmd.get("warmup"); + double x_top = cmd.get("xtop"); + double x_bottom = cmd.get("xbottom"); + + TestSystem::instance().setTopThreshold(x_top); + TestSystem::instance().setBottomThreshold(x_bottom); + + if (!filter.empty()) + { + TestSystem::instance().setTestFilter(filter); + } + + if (!workdir.empty()) + { + if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\') + { + workdir += '/'; + } + + TestSystem::instance().setWorkingDir(workdir); + } + + if (list) + { + TestSystem::instance().setListMode(true); + } + + TestSystem::instance().setNumIters(iters); + TestSystem::instance().setGPUWarmupIters(wu_iters); + + TestSystem::instance().run(); -int main() -{ - printf("OpenCV was built without OpenCL support\n"); return 0; -} - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index b7f82b685..e6e957641 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -1,4 +1,4 @@ -/////////////////////////////////////////////////////////////////////////////////////// +/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -10,17 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Niko Li, newlife20080214@gmail.com -// Jia Haipeng, jiahaipeng95@gmail.com -// Shengen Yan, yanshengen@gmail.com -// Jiang Liyuan,jlyuan001.good@163.com -// Rock Li, Rock.Li@amd.com -// Zailong Wu, bullet@yeah.net +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -35,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -48,4371 +43,1165 @@ // //M*/ - #include "precomp.hpp" -#include - -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -PARAM_TEST_CASE(ArithmTestBase, MatType, bool) +///////////// Lut //////////////////////// +TEST(lut) { - int type; - cv::Scalar val; + Mat src, lut, dst; + ocl::oclMat d_src, d_lut, d_dst; - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs + int all_type[] = {CV_8UC1, CV_8UC3}; + std::string type_name[] = {"CV_8UC1", "CV_8UC3"}; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int maskx; - int masky; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j]; - cv::RNG &rng = TS::ptr()->get_rng(); + gen(src, size, size, all_type[j], 0, 256); + gen(lut, 1, 256, CV_8UC1, 0, 1); + gen(dst, size, size, all_type[j], 0, 256); - cv::Size size(MWIDTH, MHEIGHT); + LUT(src, lut, dst); - mat1 = randomMat(rng, size, type, 5, 16, false); - //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + CPU_ON; + LUT(src, lut, dst); + CPU_OFF; - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + d_src.upload(src); + d_lut.upload(lut); + + WARMUP_ON; + ocl::LUT(d_src, d_lut, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::LUT(d_src, d_lut, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_lut.upload(lut); + ocl::LUT(d_src, d_lut, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + + } - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - maskx = 0; - masky = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gdst1_whole = dst1; - //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat1 = mat1_roi; - //gmat2 = mat2_roi; - //gmask = mask_roi; - } - -}; -////////////////////////////////lut///////////////////////////////////////////////// - -struct Lut : ArithmTestBase {}; - -TEST_P(Lut, Mat) -{ - - cv::Mat mat2(3, 512, CV_8UC1); - cv::RNG &rng = TS::ptr()->get_rng(); - rng.fill(mat2, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(256)); - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - mat2 = randomMat(rng, cv::Size(512, 3), type, 5, 16, false); - mat2_roi = mat2(Rect(src2x, src2y, 256, 1)); - - - t0 = (double)cvGetTickCount();//cpu start - cv::LUT(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::LUT(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - // s=GetParam(); - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - // src2x = rng.uniform( 0,mat2.cols - 256); - // src2y = rng.uniform (0,mat2.rows - 1); - - // cv::Mat mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2 = randomMat(rng, cv::Size(512, 3), type, 5, 16, false); - mat2_roi = mat2(Rect(src2x, src2y, 256, 1)); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - // gdst1_whole = dst1; - // gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - // gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::LUT(gmat1, gmat2, gdst); - }; -#endif - } - - -////////////////////////////////exp///////////////////////////////////////////////// - -struct Exp : ArithmTestBase {}; - -TEST_P(Exp, Mat) +///////////// Exp //////////////////////// +TEST(Exp) { + Mat src, dst; + ocl::oclMat d_src, d_dst; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); + SUBTEST << size << 'x' << size << "; CV_32FC1"; - t0 = (double)cvGetTickCount();//cpu start - cv::exp(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, CV_32FC1, 0, 256); + gen(dst, size, size, CV_32FC1, 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 + exp(src, dst); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; + CPU_ON; + exp(src, dst); + CPU_OFF; + d_src.upload(src); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::exp(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download(cpu_dst); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - //EXPECT_MAT_NEAR(dst, cpu_dst, 0,""); - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; + WARMUP_ON; + ocl::exp(d_src, d_dst); + WARMUP_OFF; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + GPU_ON; + ocl::exp(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::exp(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::exp(gmat1, gdst); - }; -#endif - } - -////////////////////////////////log///////////////////////////////////////////////// - -struct Log : ArithmTestBase {}; - -TEST_P(Log, Mat) +///////////// LOG //////////////////////// +TEST(Log) { + Mat src, dst; + ocl::oclMat d_src, d_dst; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); + SUBTEST << size << 'x' << size << "; 32F"; - t0 = (double)cvGetTickCount();//cpu start - cv::log(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, CV_32F, 1, 10); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + log(src, dst); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::log(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + log(src, dst); + CPU_OFF; + d_src.upload(src); - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + WARMUP_ON; + ocl::log(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::log(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::log(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::log(gmat1, gdst); - }; -#endif - } - - - -////////////////////////////////add///////////////////////////////////////////////// - -struct Add : ArithmTestBase {}; - -TEST_P(Add, Mat) +///////////// Add //////////////////////// +TEST(Add) { + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 1); + gen(src2, size, size, all_type[j], 0, 1); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + add(src1, src2, dst); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + CPU_ON; + add(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::add(d_src1, d_src2, d_dst); + WARMUP_OFF; + GPU_ON; + ocl::add(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::add(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Add, Mat_Mask) +///////////// Mul //////////////////////// +TEST(Mul) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, mat2_roi, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, gmat2, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + multiply(src1, src2, dst); + CPU_ON; + multiply(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::multiply(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::multiply(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::multiply(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, gmat2, gdst, gmask); - }; -#endif -} -TEST_P(Add, Scalar) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, val, gdst); - }; -#endif } -TEST_P(Add, Scalar_Mask) +///////////// Div //////////////////////// +TEST(Div) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::add(mat1_roi, val, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::add(gmat1, val, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + divide(src1, src2, dst); + + CPU_ON; + divide(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::divide(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::divide(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::divide(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::add(gmat1, val, gdst, gmask); - }; -#endif } - -////////////////////////////////sub///////////////////////////////////////////////// -struct Sub : ArithmTestBase {}; - -TEST_P(Sub, Mat) +///////////// Absdiff //////////////////////// +TEST(Absdiff) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + absdiff(src1, src2, dst); + CPU_ON; + absdiff(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::absdiff(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::absdiff(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::absdiff(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Sub, Mat_Mask) +///////////// CartToPolar //////////////////////// +TEST(CartToPolar) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst, dst1; + ocl::oclMat d_src1, d_src2, d_dst, d_dst1; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, mat2_roi, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + gen(dst1, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, gmat2, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + cartToPolar(src1, src2, dst, dst1, 1); + CPU_ON; + cartToPolar(src1, src2, dst, dst1, 1); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); + WARMUP_OFF; + + GPU_ON; + ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); + d_dst.download(dst); + d_dst1.download(dst1); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, gmat2, gdst, gmask); - }; -#endif -} -TEST_P(Sub, Scalar) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, val, gdst); - }; -#endif } -TEST_P(Sub, Scalar_Mask) +///////////// PolarToCart //////////////////////// +TEST(PolarToCart) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst, dst1; + ocl::oclMat d_src1, d_src2, d_dst, d_dst1; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::subtract(mat1_roi, val, dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + gen(dst1, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::subtract(gmat1, val, gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + polarToCart(src1, src2, dst, dst1, 1); + CPU_ON; + polarToCart(src1, src2, dst, dst1, 1); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); + WARMUP_OFF; + + GPU_ON; + ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); + d_dst.download(dst); + d_dst1.download(dst1); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmask = mask_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::subtract(gmat1, val, gdst, gmask); - }; -#endif } - -////////////////////////////////Mul///////////////////////////////////////////////// -struct Mul : ArithmTestBase {}; - -TEST_P(Mul, Mat) +///////////// Magnitude //////////////////////// +TEST(magnitude) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat x, y, mag; + ocl::oclMat d_x, d_y, d_mag; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::multiply(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(x, size, size, all_type[j], 0, 1); + gen(y, size, size, all_type[j], 0, 1); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + magnitude(x, y, mag); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::multiply(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + magnitude(x, y, mag); + CPU_OFF; + d_x.upload(x); + d_y.upload(y); + WARMUP_ON; + ocl::magnitude(d_x, d_y, d_mag); + WARMUP_OFF; + + GPU_ON; + ocl::magnitude(d_x, d_y, d_mag); + ; + GPU_OFF; + + GPU_FULL_ON; + d_x.upload(x); + d_y.upload(y); + ocl::magnitude(d_x, d_y, d_mag); + d_mag.download(mag); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::multiply(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Mul, Mat_Scalar) +///////////// Transpose //////////////////////// +TEST(Transpose) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - t0 = (double)cvGetTickCount();//cpu start - cv::multiply(mat1_roi, mat2_roi, dst_roi, s); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::multiply(gmat1, gmat2, gdst, s); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + transpose(src, dst); + CPU_ON; + transpose(src, dst); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::transpose(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::transpose(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::transpose(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::multiply(gmat1, gmat2, gdst, s); - }; -#endif } - -struct Div : ArithmTestBase {}; - -TEST_P(Div, Mat) +///////////// Flip //////////////////////// +TEST(Flip) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH"; - t0 = (double)cvGetTickCount();//cpu start - cv::divide(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + flip(src, dst, 0); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::divide(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + flip(src, dst, 0); + CPU_OFF; + d_src.upload(src); + WARMUP_ON; + ocl::flip(d_src, d_dst, 0); + WARMUP_OFF; + + GPU_ON; + ocl::flip(d_src, d_dst, 0); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::flip(d_src, d_dst, 0); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::divide(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Div, Mat_Scalar) +///////////// minMax //////////////////////// +TEST(minMax) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src; + ocl::oclMat d_src; + + double min_val, max_val; + Point min_loc, max_loc; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - t0 = (double)cvGetTickCount();//cpu start - cv::divide(mat1_roi, mat2_roi, dst_roi, s); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gen(src, size, size, all_type[j], 0, 256); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::divide(gmat1, gmat2, gdst, s); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::minMax(d_src, &min_val, &max_val); + WARMUP_OFF; + + GPU_ON; + ocl::minMax(d_src, &min_val, &max_val); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::minMax(d_src, &min_val, &max_val); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::RNG &rng = TS::ptr()->get_rng(); - double s = rng.uniform(-10.0, 10.0); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::divide(gmat1, gmat2, gdst, s); - }; -#endif } - -struct Absdiff : ArithmTestBase {}; - -TEST_P(Absdiff, Mat) +///////////// minMaxLoc //////////////////////// +TEST(minMaxLoc) { + Mat src; + ocl::oclMat d_src; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + double min_val, max_val; + Point min_loc, max_loc; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::absdiff(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 1); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + CPU_ON; + minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); + CPU_OFF; + d_src.upload(src); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::absdiff(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + WARMUP_OFF; + GPU_ON; + ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::absdiff(gmat1, gmat2, gdst); - }; -#endif } -TEST_P(Absdiff, Mat_Scalar) +///////////// Sum //////////////////////// +TEST(Sum) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src; + Scalar cpures, gpures; + ocl::oclMat d_src; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::absdiff(mat1_roi, val, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + cpures = sum(src); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::absdiff(gmat1, val, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + cpures = sum(src); + CPU_OFF; + d_src.upload(src); + WARMUP_ON; + gpures = ocl::sum(d_src); + WARMUP_OFF; + + GPU_ON; + gpures = ocl::sum(d_src); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + gpures = ocl::sum(d_src); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::absdiff(gmat1, val, gdst); - }; -#endif } - - -struct CartToPolar : ArithmTestBase {}; - -TEST_P(CartToPolar, angleInDegree) +///////////// countNonZero //////////////////////// +TEST(countNonZero) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src; + ocl::oclMat d_src; + + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + countNonZero(src); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + countNonZero(src); + CPU_OFF; + d_src.upload(src); + WARMUP_ON; + ocl::countNonZero(d_src); + WARMUP_OFF; + + GPU_ON; + ocl::countNonZero(d_src); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::countNonZero(d_src); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 1); - }; -#endif } -TEST_P(CartToPolar, angleInRadians) +///////////// Phase //////////////////////// +TEST(Phase) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::cartToPolar(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + phase(src1, src2, dst, 1); + + CPU_ON; + phase(src1, src2, dst, 1); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::phase(d_src1, d_src2, d_dst, 1); + WARMUP_OFF; + + GPU_ON; + ocl::phase(d_src1, d_src2, d_dst, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::phase(d_src1, d_src2, d_dst, 1); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cartToPolar(gmat1, gmat2, gdst, gdst1, 0); - }; -#endif } - -struct PolarToCart : ArithmTestBase {}; - -TEST_P(PolarToCart, angleInDegree) +///////////// bitwise_and//////////////////////// +TEST(bitwise_and) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_and(src1, src2, dst); + CPU_ON; + bitwise_and(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::bitwise_and(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_and(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::bitwise_and(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 1); - }; -#endif } -TEST_P(PolarToCart, angleInRadians) +///////////// bitwise_or//////////////////////// +TEST(bitwise_or) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::polarToCart(mat1_roi, mat2_roi, dst_roi, dst1_roi, 0); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - cv::Mat cpu_dst1; - gdst1_whole.download(cpu_dst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_or(src1, src2, dst); + CPU_ON; + bitwise_or(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::bitwise_or(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_or(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::bitwise_or(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::polarToCart(gmat1, gmat2, gdst, gdst1, 0); - }; -#endif } - - -struct Magnitude : ArithmTestBase {}; - -TEST_P(Magnitude, Mat) +///////////// bitwise_xor//////////////////////// +TEST(bitwise_xor) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j]; - t0 = (double)cvGetTickCount();//cpu start - cv::magnitude(mat1_roi, mat2_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::magnitude(gmat1, gmat2, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_xor(src1, src2, dst); + CPU_ON; + bitwise_xor(src1, src2, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::bitwise_xor(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_xor(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::bitwise_xor(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::magnitude(gmat1, gmat2, gdst); - }; -#endif } -struct Transpose : ArithmTestBase {}; - -TEST_P(Transpose, Mat) +///////////// bitwise_not//////////////////////// +TEST(bitwise_not) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, dst; + ocl::oclMat d_src1, d_dst; + + int all_type[] = {CV_8UC1, CV_32SC1}; + std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::transpose(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::transpose(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + bitwise_not(src1, dst); + CPU_ON; + bitwise_not(src1, dst); + CPU_OFF; + d_src1.upload(src1); + + WARMUP_ON; + ocl::bitwise_not(d_src1, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::bitwise_not(d_src1, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + ocl::bitwise_not(d_src1, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::transpose(gmat1, gdst); - }; -#endif } - -struct Flip : ArithmTestBase {}; - -TEST_P(Flip, X) +///////////// compare//////////////////////// +TEST(compare) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int CMP_EQ = 0; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::flip(mat1_roi, dst_roi, 0); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::flip(gmat1, gdst, 0); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + compare(src1, src2, dst, CMP_EQ); + CPU_ON; + compare(src1, src2, dst, CMP_EQ); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); + WARMUP_OFF; + + GPU_ON; + ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::flip(gmat1, gdst, 0); - }; -#endif } -TEST_P(Flip, Y) +///////////// pow //////////////////////// +TEST(pow) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::flip(mat1_roi, dst_roi, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 100); + gen(dst, size, size, all_type[j], 0, 100); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + pow(src, -2.0, dst); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::flip(gmat1, gdst, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + pow(src, -2.0, dst); + CPU_OFF; + d_src.upload(src); + d_dst.upload(dst); + WARMUP_ON; + ocl::pow(d_src, -2.0, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pow(d_src, -2.0, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pow(d_src, -2.0, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::flip(gmat1, gdst, 1); - }; -#endif } -TEST_P(Flip, BOTH) +///////////// MagnitudeSqr//////////////////////// +TEST(MagnitudeSqr) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + int all_type[] = {CV_32FC1}; + std::string type_name[] = {"CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[t]; - t0 = (double)cvGetTickCount();//cpu start - cv::flip(mat1_roi, dst_roi, -1); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[t], 0, 256); + gen(src2, size, size, all_type[t], 0, 256); + gen(dst, size, size, all_type[t], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::flip(gmat1, gdst, -1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + for (int i = 0; i < src1.rows; ++i) - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::flip(gmat1, gdst, -1); - }; -#endif -} - - - -struct MinMax : ArithmTestBase {}; - -TEST_P(MinMax, MAT) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - t0 = (double)cvGetTickCount();//cpu start - if (mat1.depth() != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); - } - else - { - minVal = std::numeric_limits::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - if (val < minVal) minVal = val; - if (val > maxVal) maxVal = val; - } - } - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - double minVal_, maxVal_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMax(gmat1, &minVal_, &maxVal_); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - double minVal_, maxVal_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMax(gmat1, &minVal_, &maxVal_); - }; -#endif -} - -TEST_P(MinMax, MASK) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - t0 = (double)cvGetTickCount();//cpu start - if (mat1.depth() != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi); - } - else - { - minVal = std::numeric_limits::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - unsigned char m = mask_roi.at(i, j); - if (val < minVal && m) minVal = val; - if (val > maxVal && m) maxVal = val; - } - } - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMax(gmat1, &minVal_, &maxVal_, gmask); - }; -#endif -} - - -struct MinMaxLoc : ArithmTestBase {}; - -TEST_P(MinMaxLoc, MAT) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - int depth = mat1.depth(); - t0 = (double)cvGetTickCount();//cpu start - if (depth != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc); - } - else - { - minVal = std::numeric_limits::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - if (val < minVal) - { - minVal = val; - minLoc.x = j; - minLoc.y = i; - } - if (val > maxVal) - { - maxVal = val; - maxLoc.x = j; - maxLoc.y = i; - } - } - } - - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, cv::ocl::oclMat()); - }; -#endif - -} - - -TEST_P(MinMaxLoc, MASK) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double minVal, maxVal; - cv::Point minLoc, maxLoc; - int depth = mat1.depth(); - t0 = (double)cvGetTickCount();//cpu start - if (depth != CV_8S) - { - cv::minMaxLoc(mat1_roi, &minVal, &maxVal, &minLoc, &maxLoc, mask_roi); - } - else - { - minVal = std::numeric_limits::max(); - maxVal = -std::numeric_limits::max(); - for (int i = 0; i < mat1_roi.rows; ++i) - for (int j = 0; j < mat1_roi.cols; ++j) - { - signed char val = mat1_roi.at(i, j); - unsigned char m = mask_roi.at(i , j); - if (val < minVal && m) - { - minVal = val; - minLoc.x = j; - minLoc.y = i; - } - if (val > maxVal && m) - { - maxVal = val; - maxLoc.x = j; - maxLoc.y = i; - } - } - } - - - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmask = mask_roi; - double minVal_, maxVal_; - cv::Point minLoc_, maxLoc_; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::minMaxLoc(gmat1, &minVal_, &maxVal_, &minLoc_, &maxLoc_, gmask); - }; -#endif -} - - -struct Sum : ArithmTestBase {}; - -TEST_P(Sum, MAT) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::sum(mat1_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::sum(gmat1); - t2 = (double)cvGetTickCount() - t2;//kernel - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - Scalar gpures = cv::ocl::sum(gmat1); - }; -#endif -} - -//TEST_P(Sum, MASK) -//{ -// for(int j=0; j oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - //type = GET_PARAM(0); - type = CV_8UC1; - - cv::RNG &rng = TS::ptr()->get_rng(); - - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - //mat2 = randomMat(rng, cv::Size(512,3), type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - maskx = 0; - masky = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //mat2_roi = mat2(Rect(src2x,src2y,256,1)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gdst1_whole = dst1; - //gdst1 = gdst1_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat1 = mat1_roi; - //gmat2 = mat2_roi; - //gmask = mask_roi; - } - -}; -struct Compare : CompareTestBase {}; - -TEST_P(Compare, Mat) -{ - if(mat1.type() == CV_8SC1) - { - cout << "\tUnsupported type\t\n"; - } - - int cmp_codes[] = {CMP_EQ, CMP_GT, CMP_GE, CMP_LT, CMP_LE, CMP_NE}; - const char *cmp_str[] = {"CMP_EQ", "CMP_GT", "CMP_GE", "CMP_LT", "CMP_LE", "CMP_NE"}; - int cmp_num = sizeof(cmp_codes) / sizeof(int); - for (int i = 0; i < cmp_num; ++i) - { - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::compare(mat1_roi, mat2_roi, dst_roi, cmp_codes[i]); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::compare(gmat1, gmat2, gdst, cmp_codes[i]); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - cout << cmp_str[i] << endl; - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::compare(gmat1, gmat2, gdst, cmp_codes[i]); - }; -#endif - } - -} - -struct Pow : ArithmTestBase {}; - -TEST_P(Pow, Mat) -{ - if(mat1.depth() != CV_32F && mat1.depth() != CV_64F) - { - cout << "\tUnsupported type\t\n"; - } - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - double p = 4.5; - t0 = (double)cvGetTickCount();//cpu start - cv::pow(mat1_roi, p, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pow(gmat1, p, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double p = 4.5; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::pow(gmat1, p, gdst); - }; -#endif -} - - -struct MagnitudeSqr : ArithmTestBase {}; - -TEST_P(MagnitudeSqr, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - for(int i = 0; i < mat1.rows; ++i) - for(int j = 0; j < mat1.cols; ++j) + for (int j = 0; j < src1.cols; ++j) { - float val1 = mat1.at(i, j); - float val2 = mat2.at(i, j); + float val1 = src1.at(i, j); + float val2 = src2.at(i, j); ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; } - t0 = (double)cvGetTickCount() - t0;//cpu end - t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::magnitudeSqr(clmat1, clmat2, cldst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - cldst.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + for (int i = 0; i < src1.rows; ++i) + for (int j = 0; j < src1.cols; ++j) + { + float val1 = src1.at(i, j); + float val2 = src2.at(i, j); + + ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; + + } + + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); + + WARMUP_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::magnitudeSqr(d_src1, d_src2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - cv::ocl::oclMat clmat1(mat1), clmat2(mat2), cldst; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::magnitudeSqr(clmat1, clmat2, cldst); - }; -#endif - } - -struct AddWeighted : ArithmTestBase {}; - -TEST_P(AddWeighted, Mat) +///////////// AddWeighted//////////////////////// +TEST(AddWeighted) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src1, src2, dst; + ocl::oclMat d_src1, d_src2, d_dst; + + double alpha = 2.0, beta = 1.0, gama = 3.0; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - double alpha = 2.0, beta = 1.0, gama = 3.0; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::addWeighted(mat1_roi, alpha, mat2_roi, beta, gama, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + addWeighted(src1, alpha, src2, beta, gama, dst); - gmat1 = mat1_roi; - gmat2 = mat2_roi; + CPU_ON; + addWeighted(src1, alpha, src2, beta, gama, dst); + CPU_OFF; + d_src1.upload(src1); + d_src2.upload(src2); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::addWeighted(gmat1, alpha, gmat2, beta, gama, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download(cpu_dst); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); + WARMUP_OFF; + GPU_ON; + ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double alpha = 2.0, beta = 1.0, gama = 3.0; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::addWeighted(gmat1, alpha, gmat2, beta, gama, gdst); - // double alpha=2.0,beta=1.0,gama=3.0; - // cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - // if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - // cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); - }; -#endif - -} -/* -struct AddWeighted : ArithmTestBase {}; - -TEST_P(AddWeighted, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick=0; - double totalgputick=0; - double totalgputick_kernel=0; - double t0=0; - double t1=0; - double t2=0; - for(int j = 0; j < LOOP_TIMES+1; j ++) - { - double alpha=2.0,beta=1.0,gama=3.0; - - t0 = (double)cvGetTickCount();//cpu start - cv::addWeighted(mat1,alpha,mat2,beta,gama,dst); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - - t2=(double)cvGetTickCount();//kernel - cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - cldst.download(cpu_dst); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick=t1+totalgputick; - totalcputick=t0+totalcputick; - totalgputick_kernel=t2+totalgputick_kernel; - } - cout << "average cpu runtime is " << totalcputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel/((double)cvGetTickFrequency()* LOOP_TIMES *1000.) << "ms" << endl; - -#else - //for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - // { - double alpha=2.0,beta=1.0,gama=3.0; - cv::ocl::oclMat clmat1(mat1),clmat2(mat2),cldst; - //if(j==0){cout<<"no roi:";}else{cout<<"\nwith roi:";}; - cv::ocl::addWeighted(clmat1,alpha,clmat2,beta,gama, cldst); - // }; -#endif - -} - -*/ -//********test**************** - -INSTANTIATE_TEST_CASE_P(Arithm, Lut, Combine( - Values(CV_8UC1, CV_8UC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Exp, Combine( - Values(CV_32FC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Log, Combine( - Values(CV_32FC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Add, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, Mul, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Div, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(Arithm, Absdiff, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, CartToPolar, Combine( - Values(CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, PolarToCart, Combine( - Values(CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Magnitude, Combine( - Values(CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Transpose, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Flip, Combine( - Values(CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, MinMax, Combine( - Values(CV_8UC1, CV_32FC1), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, MinMaxLoc, Combine( - Values(CV_8UC1, CV_32FC1), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, Sum, Combine( - Values(CV_8U, CV_32S, CV_32F), - Values(false))); - -INSTANTIATE_TEST_CASE_P(Arithm, CountNonZero, Combine( - Values(CV_8U, CV_32S, CV_32F), - Values(false))); - - -INSTANTIATE_TEST_CASE_P(Arithm, Phase, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_and, Combine( - Values(CV_8UC1, CV_32SC1, CV_32SC4, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_or, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_xor, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Bitwise_not, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Compare, Combine(Values(CV_8UC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, Pow, Combine(Values(CV_32FC1, CV_32FC4), Values(false))); -//Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, MagnitudeSqr, Combine( - Values(CV_32FC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Arithm, AddWeighted, Combine( - Values(CV_8UC1, CV_32SC1, CV_32FC1), - Values(false))); // Values(false) is the reserved parameter - - - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp index f78f7d6b2..00034700b 100644 --- a/modules/ocl/perf/perf_blend.cpp +++ b/modules/ocl/perf/perf_blend.cpp @@ -44,79 +44,77 @@ //M*/ #include "precomp.hpp" -#include - -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(Blend, MatType, int) +///////////// blend //////////////////////// +template +void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold) { - int type; - int channels; - std::vector oclinfo; + result_gold.create(img1.size(), img1.type()); - virtual void SetUp() + int cn = img1.channels(); + + for (int y = 0; y < img1.rows; ++y) { + const float *weights1_row = weights1.ptr(y); + const float *weights2_row = weights2.ptr(y); + const T *img1_row = img1.ptr(y); + const T *img2_row = img2.ptr(y); + T *result_gold_row = result_gold.ptr(y); - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - //cv::ocl::setBinpath(CLBINPATH); - } -}; - -TEST_P(Blend, Performance) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat img1_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); - cv::Mat img2_host = randomMat(size, CV_MAKETYPE(type, channels), 0, type == CV_8U ? 255.0 : 1.0); - cv::Mat weights1 = randomMat(size, CV_32F, 0, 1); - cv::Mat weights2 = randomMat(size, CV_32F, 0, 1); - cv::ocl::oclMat gimg1(size, CV_MAKETYPE(type, channels)), gimg2(size, CV_MAKETYPE(type, channels)), gweights1(size, CV_32F), gweights2(size, CV_32F); - cv::ocl::oclMat gdst(size, CV_MAKETYPE(type, channels)); - - - double totalgputick_all = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) //LOOP_TIMES=100 - { - t1 = (double)cvGetTickCount(); - cv::ocl::oclMat gimg1 = cv::ocl::oclMat(img1_host); - cv::ocl::oclMat gimg2 = cv::ocl::oclMat(img2_host); - cv::ocl::oclMat gweights1 = cv::ocl::oclMat(weights1); - cv::ocl::oclMat gweights2 = cv::ocl::oclMat(weights1); - - t2 = (double)cvGetTickCount(); - cv::ocl::blendLinear(gimg1, gimg2, gweights1, gweights2, gdst); - t2 = (double)cvGetTickCount() - t2; - - cv::Mat m; - gdst.download(m); - t1 = (double)cvGetTickCount() - t1; - - if (j == 0) + for (int x = 0; x < img1.cols * cn; ++x) { - continue; + float w1 = weights1_row[x / cn]; + float w2 = weights2_row[x / cn]; + result_gold_row[x] = static_cast((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); } - - totalgputick_all = t1 + totalgputick_all; - totalgputick_kernel = t2 + totalgputick_kernel; - }; - - cout << "average gpu total runtime is " << totalgputick_all / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - cout << "average gpu runtime without data transfering is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - + } } +TEST(blend) +{ + Mat src1, src2, weights1, weights2, dst; + ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Blend, Combine( - Values(CV_8U, CV_32F), Values(1, 4))); -#endif \ No newline at end of file + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1"; + + gen(src1, size, size, all_type[j], 0, 256); + gen(src2, size, size, all_type[j], 0, 256); + gen(weights1, size, size, CV_32FC1, 0, 1); + gen(weights2, size, size, CV_32FC1, 0, 1); + + blendLinearGold(src1, src2, weights1, weights2, dst); + + CPU_ON; + blendLinearGold(src1, src2, weights1, weights2, dst); + CPU_OFF; + + d_src1.upload(src1); + d_src2.upload(src2); + d_weights1.upload(weights1); + d_weights2.upload(weights2); + + WARMUP_ON; + ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + d_weights1.upload(weights1); + d_weights2.upload(weights2); + ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } + } +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp new file mode 100644 index 000000000..6562f91e4 --- /dev/null +++ b/modules/ocl/perf/perf_brute_force_matcher.cpp @@ -0,0 +1,150 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +//////////////////// BruteForceMatch ///////////////// +TEST(BruteForceMatcher) +{ + Mat trainIdx_cpu; + Mat distance_cpu; + Mat allDist_cpu; + Mat nMatches_cpu; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + // Init CPU matcher + int desc_len = 64; + + BFMatcher matcher(NORM_L2); + + Mat query; + gen(query, size, desc_len, CV_32F, 0, 1); + + Mat train; + gen(train, size, desc_len, CV_32F, 0, 1); + // Output + vector< vector > matches(2); + // Init GPU matcher + ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist); + + ocl::oclMat d_query(query); + ocl::oclMat d_train(train); + + ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches; + + SUBTEST << size << "; match"; + + matcher.match(query, train, matches[0]); + + CPU_ON; + matcher.match(query, train, matches[0]); + CPU_OFF; + + WARMUP_ON; + d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + WARMUP_OFF; + + GPU_ON; + d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); + ; + GPU_OFF; + + GPU_FULL_ON; + d_query.upload(query); + d_train.upload(train); + d_matcher.match(d_query, d_train, matches[0]); + GPU_FULL_OFF; + + SUBTEST << size << "; knnMatch"; + + matcher.knnMatch(query, train, matches, 2); + + CPU_ON; + matcher.knnMatch(query, train, matches, 2); + CPU_OFF; + + WARMUP_ON; + d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + WARMUP_OFF; + + GPU_ON; + d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); + ; + GPU_OFF; + + GPU_FULL_ON; + d_query.upload(query); + d_train.upload(train); + d_matcher.knnMatch(d_query, d_train, matches, 2); + GPU_FULL_OFF; + + SUBTEST << size << "; radiusMatch"; + + float max_distance = 2.0f; + + matcher.radiusMatch(query, train, matches, max_distance); + + CPU_ON; + matcher.radiusMatch(query, train, matches, max_distance); + CPU_OFF; + + d_trainIdx.release(); + + WARMUP_ON; + d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + WARMUP_OFF; + + GPU_ON; + d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); + ; + GPU_OFF; + + GPU_FULL_ON; + d_query.upload(query); + d_train.upload(train); + d_matcher.radiusMatch(d_query, d_train, matches, max_distance); + GPU_FULL_OFF; + } +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp index eb895df5e..428e036d0 100644 --- a/modules/ocl/perf/perf_canny.cpp +++ b/modules/ocl/perf/perf_canny.cpp @@ -42,112 +42,42 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -#ifndef MWC_TEST_UTILITY -#define MWC_TEST_UTILITY - -// Param class -#ifndef IMPLEMENT_PARAM_CLASS -#define IMPLEMENT_PARAM_CLASS(name, type) \ -class name \ - { \ - public: \ - name ( type arg = type ()) : val_(arg) {} \ - operator type () const {return val_;} \ - private: \ - type val_; \ - }; \ - inline void PrintTo( name param, std::ostream* os) \ - { \ - *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \ - } - -IMPLEMENT_PARAM_CLASS(Channels, int) -#endif // IMPLEMENT_PARAM_CLASS -#endif // MWC_TEST_UTILITY - -//////////////////////////////////////////////////////// -// Canny1 -extern std::string workdir; -IMPLEMENT_PARAM_CLASS(AppertureSize, int); -IMPLEMENT_PARAM_CLASS(L2gradient, bool); - -PARAM_TEST_CASE(Canny1, AppertureSize, L2gradient) +///////////// Canny //////////////////////// +TEST(Canny) { - int apperture_size; - bool useL2gradient; - //std::vector oclinfo; + Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); - virtual void SetUp() + if (img.empty()) { - apperture_size = GET_PARAM(0); - useL2gradient = GET_PARAM(1); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } -}; - -TEST_P(Canny1, Performance) -{ - cv::Mat img = readImage(workdir + "fruits.jpg", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - double low_thresh = 100.0; - double high_thresh = 150.0; - - cv::Mat edges_gold; - cv::ocl::oclMat edges; - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ocl_img = cv::ocl::oclMat(img);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Canny(ocl_img, edges, low_thresh, high_thresh, apperture_size, useL2gradient); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - edges.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - + throw runtime_error("can't open aloeL.jpg"); } - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1"; + Mat edges(img.size(), CV_8UC1); -} + CPU_ON; + Canny(img, edges, 50.0, 100.0); + CPU_OFF; -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, Canny1, testing::Combine( - testing::Values(AppertureSize(3), AppertureSize(5)), - testing::Values(L2gradient(false), L2gradient(true)))); + ocl::oclMat d_img(img); + ocl::oclMat d_edges; + ocl::CannyBuf d_buf; + WARMUP_ON; + ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); + WARMUP_OFF; + GPU_ON; + ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); + ; + GPU_OFF; -#endif //Have opencl \ No newline at end of file + GPU_FULL_ON; + d_img.upload(img); + ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); + d_edges.download(edges); + GPU_FULL_OFF; +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp new file mode 100644 index 000000000..e32a1839d --- /dev/null +++ b/modules/ocl/perf/perf_color.cpp @@ -0,0 +1,91 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +///////////// cvtColor//////////////////////// +TEST(cvtColor) +{ + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC4}; + std::string type_name[] = {"CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + gen(src, size, size, all_type[j], 0, 256); + SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY"; + + cvtColor(src, dst, CV_RGBA2GRAY, 4); + + CPU_ON; + cvtColor(src, dst, CV_RGBA2GRAY, 4); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); + WARMUP_OFF; + + GPU_ON; + ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); + d_dst.download(dst); + GPU_FULL_OFF; + } + + + } + + +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_columnsum.cpp index 96ea26a50..d2e3b45e5 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_columnsum.cpp @@ -15,8 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Fangfang Bai fangfang@multicorewareinc.com -// +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -31,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -43,78 +42,47 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -/////////////////////////////////////////////////////////////////////////////// -/// ColumnSum - -#ifdef HAVE_OPENCL - -//////////////////////////////////////////////////////////////////////// -// ColumnSum - -PARAM_TEST_CASE(ColumnSum) +///////////// columnSum//////////////////////// +TEST(columnSum) { - cv::Mat src; - //std::vector oclinfo; + Mat src, dst; + ocl::oclMat d_src, d_dst; - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); + SUBTEST << size << 'x' << size << "; CV_32FC1"; + + gen(src, size, size, CV_32FC1, 0, 256); + + CPU_ON; + dst.create(src.size(), src.type()); + + for (int i = 1; i < src.rows; ++i) + { + for (int j = 0; j < src.cols; ++j) + { + dst.at(i, j) = src.at(i, j) += src.at(i - 1, j); + } + } + + CPU_OFF; + + d_src.upload(src); + WARMUP_ON; + ocl::columnSum(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::columnSum(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::columnSum(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -}; - -TEST_F(ColumnSum, Performance) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat src = randomMat(size, CV_32FC1); - cv::ocl::oclMat d_dst; - - double totalgputick = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; - - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat d_src(src); - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::columnSum(d_src, d_dst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - d_dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - - -} - - - -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp index c9c19d0d4..50be2546e 100644 --- a/modules/ocl/perf/perf_fft.cpp +++ b/modules/ocl/perf/perf_fft.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Fangfangbai, fangfang@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -42,85 +42,48 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -using namespace std; -#ifdef HAVE_CLAMDFFT -//////////////////////////////////////////////////////////////////////////// -// Dft -PARAM_TEST_CASE(Dft, cv::Size, bool) + +///////////// dft //////////////////////// +TEST(dft) { - cv::Size dft_size; - bool dft_rows; - vector info; - virtual void SetUp() + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_32FC1, CV_32FC2}; + std::string type_name[] = {"CV_32FC1", "CV_32FC2"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - dft_size = GET_PARAM(0); - dft_rows = GET_PARAM(1); - cv::ocl::getDevice(info); - } -}; + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex"; -TEST_P(Dft, C2C) -{ - cv::Mat a = randomMat(dft_size, CV_32FC2, 0.0, 10.0); - int flags = 0; - flags |= dft_rows ? cv::DFT_ROWS : 0; + gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1)); - cv::ocl::oclMat d_b; + dft(src, dst); - double totalgputick = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; + CPU_ON; + dft(src, dst); + CPU_OFF; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { + d_src.upload(src); - t1 = (double)cvGetTickCount();//gpu start1 + WARMUP_ON; + ocl::dft(d_src, d_dst, Size(size, size)); + WARMUP_OFF; - cv::ocl::oclMat ga = cv::ocl::oclMat(a); //upload + GPU_ON; + ocl::dft(d_src, d_dst, Size(size, size)); + ; + GPU_OFF; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::dft(ga, d_b, a.size(), flags); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - d_b.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; + GPU_FULL_ON; + d_src.upload(src); + ocl::dft(d_src, d_dst, Size(size, size)); + d_dst.download(dst); + GPU_FULL_OFF; + } } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} - - - -TEST_P(Dft, R2CthenC2R) -{ - cv::Mat a = randomMat(dft_size, CV_32FC1, 0.0, 10.0); - - int flags = 0; - //flags |= dft_rows ? cv::DFT_ROWS : 0; // not supported yet - - cv::ocl::oclMat d_b, d_c; - - cv::ocl::dft(cv::ocl::oclMat(a), d_b, a.size(), flags); - cv::ocl::dft(d_b, d_c, a.size(), flags + cv::DFT_INVERSE + cv::DFT_REAL_OUTPUT); - - EXPECT_MAT_NEAR(a, d_c, a.size().area() * 1e-4, ""); -} - -//INSTANTIATE_TEST_CASE_P(ocl_DFT, Dft, testing::Combine( -// testing::Values(cv::Size(1280, 1024), cv::Size(1920, 1080),cv::Size(1800, 1500)), -// testing::Values(false, true))); - -#endif // HAVE_CLAMDFFT +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index 100a1c59d..e9646c77e 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -10,15 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Niko Li, newlife20080214@gmail.com -// Jia Haipeng, jiahaipeng95@gmail.com -// Zero Lin, Zero.Lin@amd.com -// Zhang Ying, zhangying913@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -33,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -45,1165 +42,331 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; -//using namespace cv::ocl; - -PARAM_TEST_CASE(FilterTestBase, MatType, bool) +///////////// Blur//////////////////////// +TEST(Blur) { - int type; - cv::Scalar val; + Mat src1, dst; + ocl::oclMat d_src1, d_dst; - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs + Size ksize = Size(3, 3); + int bordertype = BORDER_CONSTANT; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int maskx; - int masky; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - cv::ocl::oclMat gdst1_whole; //bak - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdst1; //bak - cv::ocl::oclMat gmask; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src1, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); - mat1 = randomMat(rng, size, type, 5, 16, false); - mat2 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dst1 = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); + blur(src1, dst, ksize, Point(-1, -1), bordertype); + + CPU_ON; + blur(src1, dst, ksize, Point(-1, -1), bordertype); + CPU_OFF; + + d_src1.upload(src1); + + WARMUP_ON; + ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); + WARMUP_OFF; + + GPU_ON; + ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); + d_dst.download(dst); + GPU_FULL_OFF; + } - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); } - - void random_roi() - { - cv::RNG &rng = TS::ptr()->get_rng(); - - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dst1_roi = dst1(Rect(dstx, dsty, roicols, roirows)); - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmask = mask_roi; - } - -}; - -///////////////////////////////////////////////////////////////////////////////////////////////// -// blur - -PARAM_TEST_CASE(Blur, MatType, cv::Size, int) +} +///////////// Laplacian//////////////////////// +TEST(Laplacian) { - int type; - cv::Size ksize; - int bordertype; + Mat src1, dst; + ocl::oclMat d_src1, d_dst; - //src mat - cv::Mat mat1; - cv::Mat dst; + int ksize = 3; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - bordertype = GET_PARAM(2); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src1, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + + + Laplacian(src1, dst, -1, ksize, 1); + + CPU_ON; + Laplacian(src1, dst, -1, ksize, 1); + CPU_OFF; + + d_src1.upload(src1); + + WARMUP_ON; + ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); + WARMUP_OFF; + + GPU_ON; + ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); + d_dst.download(dst); + GPU_FULL_OFF; } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -TEST_P(Blur, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::blur(mat1_roi, dst_roi, ksize, Point(-1, -1), bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::blur(gmat1, gdst, ksize, Point(-1, -1), bordertype); - }; -#endif - } -///////////////////////////////////////////////////////////////////////////////////////////////// -//Laplacian - -PARAM_TEST_CASE(LaplacianTestBase, MatType, int) +///////////// Erode //////////////////// +TEST(Erode) { - int type; - int ksize; + Mat src, dst, ker; + ocl::oclMat d_src, d_dst; - //src mat - cv::Mat mat; - cv::Mat dst; + int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat dst_roi; - std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat.cols - 1; - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256)); + ker = getStructuringElement(MORPH_RECT, Size(3, 3)); + + erode(src, dst, ker); + + CPU_ON; + erode(src, dst, ker); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::erode(d_src, d_dst, ker); + WARMUP_OFF; + + GPU_ON; + ocl::erode(d_src, d_dst, ker); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::erode(d_src, d_dst, ker); + d_dst.download(dst); + GPU_FULL_OFF; } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -struct Laplacian : LaplacianTestBase {}; - -TEST_P(Laplacian, Accuracy) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::Laplacian(mat_roi, dst_roi, -1, ksize, 1); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat = mat_roi; - - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::Laplacian(gmat, gdst, -1, ksize, 1); - }; -#endif } - -///////////////////////////////////////////////////////////////////////////////////////////////// -// erode & dilate - -PARAM_TEST_CASE(ErodeDilateBase, MatType, bool) +///////////// Sobel //////////////////////// +TEST(Sobel) { - int type; - //int iterations; + Mat src, dst; + ocl::oclMat d_src, d_dst; - //erode or dilate kernel - cv::Mat kernel; + int dx = 1; + int dy = 1; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - // iterations = GET_PARAM(1); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - // rng.fill(kernel, cv::RNG::UNIFORM, cv::Scalar::all(0), cv::Scalar::all(3)); - kernel = randomMat(rng, Size(3, 3), CV_8UC1, 0, 3, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + Sobel(src, dst, -1, dx, dy); + + CPU_ON; + Sobel(src, dst, -1, dx, dy); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::Sobel(d_src, d_dst, -1, dx, dy); + WARMUP_OFF; + + GPU_ON; + ocl::Sobel(d_src, d_dst, -1, dx, dy); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::Sobel(d_src, d_dst, -1, dx, dy); + d_dst.download(dst); + GPU_FULL_OFF; } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -// erode - -struct Erode : ErodeDilateBase {}; - -TEST_P(Erode, Mat) +} +///////////// Scharr //////////////////////// +TEST(Scharr) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int dx = 1; + int dy = 0; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::erode(mat1_roi, dst_roi, kernel); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + Scharr(src, dst, -1, dx, dy); - gmat1 = mat1_roi; + CPU_ON; + Scharr(src, dst, -1, dx, dy); + CPU_OFF; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::erode(gmat1, gdst, kernel); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + d_src.upload(src); - if(j == 0) - continue; + WARMUP_ON; + ocl::Scharr(d_src, d_dst, -1, dx, dy); + WARMUP_OFF; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + GPU_ON; + ocl::Scharr(d_src, d_dst, -1, dx, dy); + ; + GPU_OFF; + GPU_FULL_ON; + d_src.upload(src); + ocl::Scharr(d_src, d_dst, -1, dx, dy); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::erode(gmat1, gdst, kernel); - }; -#endif - } -// dilate - -struct Dilate : ErodeDilateBase {}; - -TEST_P(Dilate, Mat) +///////////// GaussianBlur //////////////////////// +TEST(GaussianBlur) { + Mat src, dst; + int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - t0 = (double)cvGetTickCount();//cpu start - cv::dilate(mat1_roi, dst_roi, kernel); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + gen(src, size, size, all_type[j], 0, 256); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::dilate(gmat1, gdst, kernel); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + GaussianBlur(src, dst, Size(9, 9), 0); - if(j == 0) - continue; + CPU_ON; + GaussianBlur(src, dst, Size(9, 9), 0); + CPU_OFF; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + ocl::oclMat d_src(src); + ocl::oclMat d_dst(src.size(), src.type()); + ocl::oclMat d_buf; + WARMUP_ON; + ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + WARMUP_OFF; + + GPU_ON; + ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::dilate(gmat1, gdst, kernel); - }; -#endif - } -///////////////////////////////////////////////////////////////////////////////////////////////// -// Sobel - -PARAM_TEST_CASE(Sobel, MatType, int, int, int, int) +///////////// filter2D//////////////////////// +TEST(filter2D) { - int type; - int dx, dy, ksize, bordertype; + Mat src; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - dx = GET_PARAM(1); - dy = GET_PARAM(2); - ksize = GET_PARAM(3); - bordertype = GET_PARAM(4); - dx = 2; - dy = 0; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + gen(src, size, size, all_type[j], 0, 256); + + for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1) + { + SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; + + Mat kernel; + gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); + + Mat dst; + cv::filter2D(src, dst, -1, kernel); + + CPU_ON; + cv::filter2D(src, dst, -1, kernel); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + WARMUP_OFF; + + GPU_ON; + ocl::filter2D(d_src, d_dst, -1, kernel); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::filter2D(d_src, d_dst, -1, kernel); + d_dst.download(dst); + GPU_FULL_OFF; + } + } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); } - -}; - -TEST_P(Sobel, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::Sobel(mat1_roi, dst_roi, -1, dx, dy, ksize, /*scale*/0.00001,/*delta*/0, bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::Sobel(gmat1, gdst, -1, dx, dy, ksize,/*scale*/0.00001,/*delta*/0, bordertype); - }; -#endif - -} - -///////////////////////////////////////////////////////////////////////////////////////////////// -// Scharr - -PARAM_TEST_CASE(Scharr, MatType, int, int, int) -{ - int type; - int dx, dy, bordertype; - - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - dx = GET_PARAM(1); - dy = GET_PARAM(2); - bordertype = GET_PARAM(3); - dx = 1; - dy = 0; - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - } -}; - -TEST_P(Scharr, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::Scharr(mat1_roi, dst_roi, -1, dx, dy, /*scale*/1,/*delta*/0, bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::Scharr(gmat1, gdst, -1, dx, dy,/*scale*/1,/*delta*/0, bordertype); - }; -#endif - -} - -///////////////////////////////////////////////////////////////////////////////////////////////// -// GaussianBlur - -PARAM_TEST_CASE(GaussianBlur, MatType, cv::Size, int) -{ - int type; - cv::Size ksize; - int bordertype; - - double sigma1, sigma2; - - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - ksize = GET_PARAM(1); - bordertype = GET_PARAM(2); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - sigma1 = rng.uniform(0.1, 1.0); - sigma2 = rng.uniform(0.1, 1.0); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - roicols = mat1.cols - 1; - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - } - -}; - -TEST_P(GaussianBlur, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::GaussianBlur(mat1_roi, dst_roi, ksize, sigma1, sigma2, bordertype); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::GaussianBlur(gmat1, gdst, ksize, sigma1, sigma2, bordertype); - }; -#endif - -} - -//************test********** - -INSTANTIATE_TEST_CASE_P(Filter, Blur, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(cv::Size(3, 3)/*, cv::Size(5, 5), cv::Size(7, 7)*/), - Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE, (MatType)cv::BORDER_REFLECT, (MatType)cv::BORDER_REFLECT_101))); - - -INSTANTIATE_TEST_CASE_P(Filters, Laplacian, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(1/*, 3*/))); - -//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); - -INSTANTIATE_TEST_CASE_P(Filter, Erode, Combine(Values(CV_8UC1, CV_8UC1), Values(false))); - -//INSTANTIATE_TEST_CASE_P(Filter, ErodeDilate, Combine(Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 2, 3))); - -INSTANTIATE_TEST_CASE_P(Filter, Dilate, Combine(Values(CV_8UC1, CV_8UC1), Values(false))); - - -INSTANTIATE_TEST_CASE_P(Filter, Sobel, Combine(Values(CV_8UC1, CV_32FC1), - Values(1, 2), Values(0, 1), Values(3, 5), Values((MatType)cv::BORDER_CONSTANT, - (MatType)cv::BORDER_REPLICATE))); - - -INSTANTIATE_TEST_CASE_P(Filter, Scharr, Combine( - Values(CV_8UC1, CV_32FC1), Values(0, 1), Values(0, 1), - Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); - -INSTANTIATE_TEST_CASE_P(Filter, GaussianBlur, Combine( - Values(CV_8UC1, CV_32FC1), - Values(cv::Size(3, 3), cv::Size(5, 5)), - Values((MatType)cv::BORDER_CONSTANT, (MatType)cv::BORDER_REPLICATE))); - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp index c3dcab34f..930ecb046 100644 --- a/modules/ocl/perf/perf_gemm.cpp +++ b/modules/ocl/perf/perf_gemm.cpp @@ -16,6 +16,7 @@ // // @Authors // Fangfang Bai, fangfang@multicorewareinc.com +// // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -41,73 +42,47 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - - #include "precomp.hpp" -using namespace std; -#ifdef HAVE_CLAMDBLAS -//////////////////////////////////////////////////////////////////////////// -// GEMM -PARAM_TEST_CASE(Gemm, int, cv::Size, int) + +///////////// gemm //////////////////////// +TEST(gemm) { - int type; - cv::Size mat_size; - int flags; - vector info; - virtual void SetUp() + Mat src1, src2, src3, dst; + ocl::oclMat d_src1, d_src2, d_src3, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - mat_size = GET_PARAM(1); - flags = GET_PARAM(2); + SUBTEST << size << 'x' << size; - cv::ocl::getDevice(info); + gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); + gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); + gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); + + gemm(src1, src2, 1.0, src3, 1.0, dst); + + CPU_ON; + gemm(src1, src2, 1.0, src3, 1.0, dst); + CPU_OFF; + + d_src1.upload(src1); + d_src2.upload(src2); + d_src3.upload(src3); + + WARMUP_ON; + ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src1.upload(src1); + d_src2.upload(src2); + d_src3.upload(src3); + ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } -}; - -TEST_P(Gemm, Performance) -{ - cv::Mat a = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat b = randomMat(mat_size, type, 0.0, 10.0); - cv::Mat c = randomMat(mat_size, type, 0.0, 10.0); - cv::ocl::oclMat ocl_dst; - - double totalgputick = 0; - double totalgputick_kernel = 0; - double t1 = 0; - double t2 = 0; - - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ga = cv::ocl::oclMat(a);//upload - cv::ocl::oclMat gb = cv::ocl::oclMat(b);//upload - cv::ocl::oclMat gc = cv::ocl::oclMat(c);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::gemm(ga, gb, 1.0, gc, 1.0, ocl_dst, flags); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - ocl_dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} - - -INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine( - testing::Values(CV_32FC1, CV_32FC2/* , CV_64FC1, CV_64FC2*/), - testing::Values(cv::Size(512, 512), cv::Size(1024, 1024)), - testing::Values(0, (int)cv::GEMM_1_T, (int)cv::GEMM_2_T, (int)(cv::GEMM_1_T + cv::GEMM_2_T)))); -#endif \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index 525b8fb49..5a909ace4 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -10,12 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Jia Haipeng, jiahaipeng95@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,133 +42,97 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - -#include "opencv2/objdetect/objdetect.hpp" #include "precomp.hpp" -#ifdef HAVE_OPENCL +///////////// Haar //////////////////////// +namespace cv +{ +namespace ocl +{ -using namespace cvtest; -using namespace testing; -using namespace std; -using namespace cv; -extern std::string workdir; struct getRect { - Rect operator ()(const CvAvgComp &e) const + Rect operator()(const CvAvgComp &e) const { return e.rect; } }; -PARAM_TEST_CASE(HaarTestBase, int, int) +class CascadeClassifier_GPU : public OclCascadeClassifier { - //std::vector oclinfo; - cv::ocl::OclCascadeClassifier cascade, nestedCascade; - cv::CascadeClassifier cpucascade, cpunestedCascade; - // Mat img; - - double scale; - int index; - - virtual void SetUp() +public: + void detectMultiScale(oclMat &image, + CV_OUT std::vector& faces, + double scaleFactor = 1.1, + int minNeighbors = 3, int flags = 0, + Size minSize = Size(), + Size maxSize = Size()) { - scale = 1.0; - index = 0; - string cascadeName = "../../../data/haarcascades/haarcascade_frontalface_alt.xml"; - - if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName))) - { - cout << "ERROR: Could not load classifier cascade" << endl; - return; - } - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath("E:\\"); + (void)maxSize; + MemStorage storage(cvCreateMemStorage(0)); + //CvMat img=image; + CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize); + vector vecAvgComp; + Seq(objs).copyTo(vecAvgComp); + faces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); } + }; -////////////////////////////////faceDetect///////////////////////////////////////////////// - -struct Haar : HaarTestBase {}; - -TEST_F(Haar, FaceDetect) -{ - string imgName = workdir + "lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << imgName << std::endl; - return ; - } - - //int i = 0; - double t = 0; - vector faces, oclfaces; - - // const static Scalar colors[] = { CV_RGB(0, 0, 255), - // CV_RGB(0, 128, 255), - // CV_RGB(0, 255, 255), - // CV_RGB(0, 255, 0), - // CV_RGB(255, 128, 0), - // CV_RGB(255, 255, 0), - // CV_RGB(255, 0, 0), - // CV_RGB(255, 0, 255) - // } ; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - t = (double)cvGetTickCount(); - for(int k = 0; k < LOOP_TIMES; k++) - { - cpucascade.detectMultiScale( smallImg, faces, 1.1, - 3, 0 - | CV_HAAR_SCALE_IMAGE - , Size(30, 30), Size(0, 0) ); - } - t = (double)cvGetTickCount() - t ; - printf( "cpudetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) ); - - cv::ocl::oclMat image; - CvSeq *_objects=NULL; - t = (double)cvGetTickCount(); - for(int k = 0; k < LOOP_TIMES; k++) - { - image.upload(smallImg); - _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, - 3, 0 - | CV_HAAR_SCALE_IMAGE - , Size(30, 30), Size(0, 0) ); - } - t = (double)cvGetTickCount() - t ; - printf( "ocldetection time = %g ms\n", t / (LOOP_TIMES * (double)cvGetTickFrequency() * 1000.) ); - vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - oclfaces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - - //for( vector::const_iterator r = faces.begin(); r != faces.end(); r++, i++ ) - //{ - // Mat smallImgROI; - // Point center; - // Scalar color = colors[i%8]; - // int radius; - // center.x = cvRound((r->x + r->width*0.5)*scale); - // center.y = cvRound((r->y + r->height*0.5)*scale); - // radius = cvRound((r->width + r->height)*0.25*scale); - // circle( img, center, radius, color, 3, 8, 0 ); - //} - //namedWindow("result"); - //imshow("result",img); - //waitKey(0); - //destroyAllWindows(); - } -#endif // HAVE_OPENCL +} +TEST(Haar) +{ + Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE); + + if (img.empty()) + { + throw runtime_error("can't open basketball1.png"); + } + + CascadeClassifier faceCascadeCPU; + + if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml"))) + { + throw runtime_error("can't load haarcascade_frontalface_alt.xml"); + } + + vector faces; + + SUBTEST << img.cols << "x" << img.rows << "; scale image"; + CPU_ON; + faceCascadeCPU.detectMultiScale(img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + CPU_OFF; + + ocl::CascadeClassifier_GPU faceCascade; + + if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml"))) + { + throw runtime_error("can't load haarcascade_frontalface_alt.xml"); + } + + ocl::oclMat d_img(img); + + faces.clear(); + + WARMUP_ON; + faceCascade.detectMultiScale(d_img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + WARMUP_OFF; + + faces.clear(); + + GPU_ON; + faceCascade.detectMultiScale(d_img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + ; + GPU_OFF; + + GPU_FULL_ON; + d_img.upload(img); + faceCascade.detectMultiScale(d_img, faces, + 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); + GPU_FULL_OFF; +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index fd58808a8..b74077ff4 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Peng Xiao, pengxiao@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -42,125 +42,47 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -extern std::string workdir; - -#ifndef MWC_TEST_UTILITY -#define MWC_TEST_UTILITY - -// Param class -#ifndef IMPLEMENT_PARAM_CLASS -#define IMPLEMENT_PARAM_CLASS(name, type) \ -class name \ - { \ - public: \ - name ( type arg = type ()) : val_(arg) {} \ - operator type () const {return val_;} \ - private: \ - type val_; \ - }; \ - inline void PrintTo( name param, std::ostream* os) \ - { \ - *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \ - } - -#endif // IMPLEMENT_PARAM_CLASS -#endif // MWC_TEST_UTILITY - -IMPLEMENT_PARAM_CLASS(WinSizw48, bool); - -PARAM_TEST_CASE(HOG, WinSizw48, bool) +///////////// HOG//////////////////////// +TEST(HOG) { - bool is48; - vector detector; - virtual void SetUp() + Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE); + + if (src.empty()) { - is48 = GET_PARAM(0); - if(is48) - { - detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); - } - else - { - detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); - } - } -}; - -TEST_P(HOG, Performance) -{ - cv::Mat img = readImage(workdir + "lena.jpg", cv::IMREAD_GRAYSCALE); - ASSERT_FALSE(img.empty()); - - // define HOG related arguments - float scale = 1.05f; - //int nlevels = 13; - int gr_threshold = 8; - float hit_threshold = 1.4f; - //bool hit_threshold_auto = true; - - int win_width = is48 ? 48 : 64; - int win_stride_width = 8; - int win_stride_height = 8; - - bool gamma_corr = true; - - Size win_size(win_width, win_width * 2); //(64, 128) or (48, 96) - Size win_stride(win_stride_width, win_stride_height); - - cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, - cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, - cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); - - gpu_hog.setSVMDetector(detector); - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - t1 = (double)cvGetTickCount();//gpu start1 - - ocl::oclMat d_src(img);//upload - - t2 = (double)cvGetTickCount(); //kernel - - vector found; - gpu_hog.detectMultiScale(d_src, found, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold); - - t2 = (double)cvGetTickCount() - t2;//kernel - - // no download time for HOG - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - + throw runtime_error("can't open road.png"); } - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} + cv::HOGDescriptor hog; + hog.setSVMDetector(hog.getDefaultPeopleDetector()); + std::vector found_locations; -INSTANTIATE_TEST_CASE_P(GPU_ObjDetect, HOG, testing::Combine(testing::Values(WinSizw48(false), WinSizw48(true)), testing::Values(false))); + SUBTEST << 768 << 'x' << 576 << "; road.png"; -#endif //Have opencl \ No newline at end of file + hog.detectMultiScale(src, found_locations); + + CPU_ON; + hog.detectMultiScale(src, found_locations); + CPU_OFF; + + cv::ocl::HOGDescriptor ocl_hog; + ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); + ocl::oclMat d_src; + d_src.upload(src); + + WARMUP_ON; + ocl_hog.detectMultiScale(d_src, found_locations); + WARMUP_OFF; + + GPU_ON; + ocl_hog.detectMultiScale(d_src, found_locations); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl_hog.detectMultiScale(d_src, found_locations); + GPU_FULL_OFF; +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index bc54cb275..756f69556 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -10,18 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Niko Li, newlife20080214@gmail.com -// Jia Haipeng, jiahaipeng95@gmail.com -// Shengen Yan, yanshengen@gmail.com -// Jiang Liyuan, lyuan001.good@163.com -// Rock Li, Rock.Li@amd.com -// Zailong Wu, bullet@yeah.net -// Xu Pang, pangxu010@163.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -36,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -48,949 +42,290 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; - - -MatType nulltype = -1; - -#define ONE_TYPE(type) testing::ValuesIn(typeVector(type)) -#define NULL_TYPE testing::ValuesIn(typeVector(nulltype)) - - -vector typeVector(MatType type) +///////////// equalizeHist //////////////////////// +TEST(equalizeHist) { - vector v; - v.push_back(type); - return v; -} + Mat src, dst; + int all_type[] = {CV_8UC1}; + std::string type_name[] = {"CV_8UC1"}; - -PARAM_TEST_CASE(ImgprocTestBase, MatType, MatType, MatType, MatType, MatType, bool) -{ - int type1, type2, type3, type4, type5; - cv::Scalar val; - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int dstx; - int dsty; - int dst1x; - int dst1y; - int maskx; - int masky; - - //mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mask; - cv::Mat dst; - cv::Mat dst1; //bak, for two outputs - - //mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - cv::Mat dst1_roi; //bak - //std::vector oclinfo; - //ocl mat - cv::ocl::oclMat clmat1; - cv::ocl::oclMat clmat2; - cv::ocl::oclMat clmask; - cv::ocl::oclMat cldst; - cv::ocl::oclMat cldst1; //bak - - //ocl mat with roi - cv::ocl::oclMat clmat1_roi; - cv::ocl::oclMat clmat2_roi; - cv::ocl::oclMat clmask_roi; - cv::ocl::oclMat cldst_roi; - cv::ocl::oclMat cldst1_roi; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type1 = GET_PARAM(0); - type2 = GET_PARAM(1); - type3 = GET_PARAM(2); - type4 = GET_PARAM(3); - type5 = GET_PARAM(4); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - double min = 1, max = 20; - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums>0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - if(type1 != nulltype) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - mat1 = randomMat(rng, size, type1, min, max, false); - clmat1 = mat1; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + equalizeHist(src, dst); + + CPU_ON; + equalizeHist(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + ocl::oclMat d_hist; + ocl::oclMat d_buf; + + WARMUP_ON; + ocl::equalizeHist(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::equalizeHist(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::equalizeHist(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - if(type2 != nulltype) - { - mat2 = randomMat(rng, size, type2, min, max, false); - clmat2 = mat2; - } - if(type3 != nulltype) - { - dst = randomMat(rng, size, type3, min, max, false); - cldst = dst; - } - if(type4 != nulltype) - { - dst1 = randomMat(rng, size, type4, min, max, false); - cldst1 = dst1; - } - if(type5 != nulltype) - { - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - cv::threshold(mask, mask, 0.5, 255., type5); - clmask = mask; - } - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - } - - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; //start - roirows = mat1.rows - 1; - src1x = 1; - src2x = 1; - src1y = 1; - src2y = 1; - dstx = 1; - dsty = 1; - dst1x = 1; - dst1y = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src2x = 0; - src1y = 0; - src2y = 0; - dstx = 0; - dsty = 0; - dst1x = 0; - dst1y = 0; - maskx = 0; - masky = 0; - }; - - if(type1 != nulltype) - { - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); - } - if(type2 != nulltype) - { - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); - } - if(type3 != nulltype) - { - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); - } - if(type4 != nulltype) - { - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); - } - if(type5 != nulltype) - { - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); - } - } - - void random_roi() - { - cv::RNG &rng = TS::ptr()->get_rng(); - - //randomize ROI - roicols = rng.uniform(1, mat1.cols); - roirows = rng.uniform(1, mat1.rows); - src1x = rng.uniform(0, mat1.cols - roicols); - src1y = rng.uniform(0, mat1.rows - roirows); - src2x = rng.uniform(0, mat2.cols - roicols); - src2y = rng.uniform(0, mat2.rows - roirows); - dstx = rng.uniform(0, dst.cols - roicols); - dsty = rng.uniform(0, dst.rows - roirows); - dst1x = rng.uniform(0, dst1.cols - roicols); - dst1y = rng.uniform(0, dst1.rows - roirows); - maskx = rng.uniform(0, mask.cols - roicols); - masky = rng.uniform(0, mask.rows - roirows); - - if(type1 != nulltype) - { - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - //clmat1_roi = clmat1(Rect(src1x,src1y,roicols,roirows)); - } - if(type2 != nulltype) - { - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - //clmat2_roi = clmat2(Rect(src2x,src2y,roicols,roirows)); - } - if(type3 != nulltype) - { - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - //cldst_roi = cldst(Rect(dstx,dsty,roicols,roirows)); - } - if(type4 != nulltype) - { - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - //cldst1_roi = cldst1(Rect(dst1x,dst1y,roicols,roirows)); - } - if(type5 != nulltype) - { - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - //clmask_roi = clmask(Rect(maskx,masky,roicols,roirows)); - } - } -}; -////////////////////////////////equalizeHist////////////////////////////////////////// - -struct equalizeHist : ImgprocTestBase {}; - -TEST_P(equalizeHist, MatType) -{ - if (mat1.type() != CV_8UC1 || mat1.type() != dst.type()) - { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::equalizeHist(mat1_roi, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - cldst_roi = cldst(Rect(dstx, dsty, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::equalizeHist(clmat1_roi, cldst_roi); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - //cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::equalizeHist(clmat1_roi, cldst_roi); - }; -#endif - } -} - - -////////////////////////////////bilateralFilter//////////////////////////////////////////// - -struct bilateralFilter : ImgprocTestBase {}; - -TEST_P(bilateralFilter, Mat) -{ - double sigmacolor = 50.0; - int radius = 9; - int d = 2 * radius + 1; - double sigmaspace = 20.0; - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,cv::BORDER_REFLECT,cv::BORDER_WRAP,cv::BORDER_REFLECT_101*/}; - const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; - - if (mat1.depth() != CV_8U || mat1.type() != dst.type()) - { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { - for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) - { - cout << borderstr[i] << endl; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - if(((bordertype[i] != cv::BORDER_CONSTANT) && (bordertype[i] != cv::BORDER_REPLICATE) && (mat1_roi.cols <= radius)) || (mat1_roi.cols <= radius) || (mat1_roi.rows <= radius) || (mat1_roi.rows <= radius)) - { - continue; - } - t0 = (double)cvGetTickCount();//cpu start - cv::bilateralFilter(mat1_roi, dst_roi, d, sigmacolor, sigmaspace, bordertype[i]); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } - -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::bilateralFilter(clmat1_roi, cldst_roi, d, sigmacolor, sigmaspace, bordertype[i]); - }; - -#endif - }; } } - -////////////////////////////////copyMakeBorder//////////////////////////////////////////// - -struct CopyMakeBorder : ImgprocTestBase {}; - -TEST_P(CopyMakeBorder, Mat) +/////////// CopyMakeBorder ////////////////////// +TEST(CopyMakeBorder) { - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE, cv::BORDER_REFLECT, cv::BORDER_WRAP, cv::BORDER_REFLECT_101}; - //const char* borderstr[]={"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; - int top = 5; - int bottom = 5; - int left = 6; - int right = 6; - if (mat1.type() != dst.type()) + Mat src, dst; + ocl::oclMat d_dst; + + int bordertype = BORDER_CONSTANT; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - cout << "Unsupported type" << endl; - EXPECT_DOUBLE_EQ(0.0, 0.0); - } - else - { - for(size_t i = 0; i < sizeof(bordertype) / sizeof(int); i++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < 1; k++) //don't support roi perf test - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::copyMakeBorder(mat1_roi, dst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); - t0 = (double)cvGetTickCount() - t0;//cpu end - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + gen(src, size, size, all_type[j], 0, 256); - if(j == 0) - continue; + copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + CPU_OFF; + + ocl::oclMat d_src(src); + + WARMUP_ON; + ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + WARMUP_OFF; + + GPU_ON; + ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); + d_dst.download(dst); + GPU_FULL_OFF; + } - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::copyMakeBorder(clmat1_roi, cldst_roi, top, bottom, left, right, bordertype[i] | cv::BORDER_ISOLATED, cv::Scalar(1.0)); - }; -#endif - }; } } - -////////////////////////////////cornerMinEigenVal////////////////////////////////////////// - -struct cornerMinEigenVal : ImgprocTestBase {}; - -TEST_P(cornerMinEigenVal, Mat) +///////////// cornerMinEigenVal //////////////////////// +TEST(cornerMinEigenVal) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_dst; + + int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); + int borderType = BORDER_REFLECT; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - int blockSize = 7, apertureSize = 3; //1 + 2 * (rand() % 4); - int borderType = cv::BORDER_REFLECT; - t0 = (double)cvGetTickCount();//cpu start - cv::cornerMinEigenVal(mat1_roi, dst_roi, blockSize, apertureSize, borderType); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; + gen(src, size, size, all_type[j], 0, 256); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); + CPU_ON; + cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); + CPU_OFF; + + ocl::oclMat d_src(src); + + WARMUP_ON; + ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); + WARMUP_OFF; + + GPU_ON; + ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); - int borderType = cv::BORDER_REFLECT; - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cornerMinEigenVal(clmat1_roi, cldst_roi, blockSize, apertureSize, borderType); - }; -#endif } - - -////////////////////////////////cornerHarris////////////////////////////////////////// - -struct cornerHarris : ImgprocTestBase {}; - -TEST_P(cornerHarris, Mat) +///////////// cornerHarris //////////////////////// +TEST(cornerHarris) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - int blockSize = 7, apertureSize = 3; - int borderType = cv::BORDER_REFLECT; - double kk = 2; - t0 = (double)cvGetTickCount();//cpu start - cv::cornerHarris(mat1_roi, dst_roi, blockSize, apertureSize, kk, borderType); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT"; - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cldst.download(cpu_cldst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + gen(src, size, size, all_type[j], 0, 1); - if(j == 0) - continue; + cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); + WARMUP_OFF; + + GPU_ON; + ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double kk = 2; - int blockSize = 7, apertureSize = 3; - int borderType = cv::BORDER_REFLECT; - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::cornerHarris(clmat1_roi, cldst_roi, blockSize, apertureSize, kk, borderType); - }; -#endif - } - - -////////////////////////////////integral///////////////////////////////////////////////// - -struct integral : ImgprocTestBase {}; - -TEST_P(integral, Mat) +///////////// integral //////////////////////// +TEST(integral) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, sum; + ocl::oclMat d_src, d_sum, d_buf; + + int all_type[] = {CV_8UC1}; + std::string type_name[] = {"CV_8UC1"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); - t0 = (double)cvGetTickCount();//cpu start - cv::integral(mat1_roi, dst_roi, dst1_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t1 = (double)cvGetTickCount();//gpu start1 - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - } - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_cldst; - cv::Mat cpu_cldst1; - cldst.download(cpu_cldst);//download - cldst1.download(cpu_cldst1); - t1 = (double)cvGetTickCount() - t1;//gpu end1 + gen(src, size, size, all_type[j], 0, 256); - if(j == 0) - continue; + integral(src, sum); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + integral(src, sum); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + ocl::integral(d_src, d_sum); + WARMUP_OFF; + + GPU_ON; + ocl::integral(d_src, d_sum); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::integral(d_src, d_sum); + d_sum.download(sum); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - if(type1 != nulltype) - { - clmat1_roi = clmat1(Rect(src1x, src1y, roicols, roirows)); - }; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::integral(clmat1_roi, cldst_roi, cldst1_roi); - }; -#endif } - - -///////////////////////////////////////////////////////////////////////////////////////////////// -// warpAffine & warpPerspective - -PARAM_TEST_CASE(WarpTestBase, MatType, int) +///////////// WarpAffine //////////////////////// +TEST(WarpAffine) { - int type; - cv::Size size; - int interpolation; + Mat src, dst; + ocl::oclMat d_src, d_dst; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() - { - type = GET_PARAM(0); - //dsize = GET_PARAM(1); - interpolation = GET_PARAM(1); - - cv::RNG &rng = TS::ptr()->get_rng(); - size = cv::Size(MWIDTH, MHEIGHT); - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - src_roicols = mat1.cols - 1; //start - src_roirows = mat1.rows - 1; - dst_roicols = dst.cols - 1; - dst_roirows = dst.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - - } - else - { - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols = dst.cols; - dst_roirows = dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - - }; - mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - - } - -}; - -/////warpAffine - -struct WarpAffine : WarpTestBase {}; - -TEST_P(WarpAffine, Mat) -{ static const double coeffs[2][3] = { {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, {sin(3.14 / 6), cos(3.14 / 6), -100.0} }; Mat M(2, 3, CV_64F, (void *)coeffs); + int interpolation = INTER_NEAREST; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::warpAffine(mat1_roi, dst_roi, M, size, interpolation); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + Size size1 = Size(size, size); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + warpAffine(src, dst, M, size1, interpolation); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + CPU_ON; + warpAffine(src, dst, M, size1, interpolation); + CPU_OFF; - if(j == 0) - continue; + d_src.upload(src); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::warpAffine(d_src, d_dst, M, size1, interpolation); + WARMUP_OFF; + GPU_ON; + ocl::warpAffine(d_src, d_dst, M, size1, interpolation); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::warpAffine(d_src, d_dst, M, size1, interpolation); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::warpAffine(gmat1, gdst, M, size, interpolation); - }; -#endif - } - - -// warpPerspective - -struct WarpPerspective : WarpTestBase {}; - -TEST_P(WarpPerspective, Mat) +///////////// WarpPerspective //////////////////////// +TEST(WarpPerspective) { + Mat src, dst; + ocl::oclMat d_src, d_dst; + static const double coeffs[3][3] = { {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, @@ -998,1154 +333,576 @@ TEST_P(WarpPerspective, Mat) {0.0, 0.0, 1.0} }; Mat M(3, 3, CV_64F, (void *)coeffs); + int interpolation = INTER_NEAREST; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - cv::warpPerspective(mat1_roi, dst_roi, M, size, interpolation); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); + gen(dst, size, size, all_type[j], 0, 256); + Size size1 = Size(size, size); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); + warpPerspective(src, dst, M, size1, interpolation); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 + CPU_ON; + warpPerspective(src, dst, M, size1, interpolation); + CPU_OFF; - if(j == 0) - continue; + d_src.upload(src); - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + WARMUP_ON; + ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); + WARMUP_OFF; + GPU_ON; + ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); + d_dst.download(dst); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gmat1 = mat1_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::warpPerspective(gmat1, gdst, M, size, interpolation); - }; -#endif - } -///////////////////////////////////////////////////////////////////////////////////////////////// -// remap -////////////////////////////////////////////////////////////////////////////////////////////////// - -PARAM_TEST_CASE(Remap, MatType, MatType, MatType, int, int) +///////////// resize //////////////////////// +TEST(resize) { - int srcType; - int map1Type; - int map2Type; - cv::Scalar val; - - int interpolation; - int bordertype; - - cv::Mat src; - cv::Mat dst; - cv::Mat map1; - cv::Mat map2; + Mat src, dst; + ocl::oclMat d_src, d_dst; - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int map1_roicols; - int map1_roirows; - int map2_roicols; - int map2_roirows; - int srcx; - int srcy; - int dstx; - int dsty; - int map1x; - int map1y; - int map2x; - int map2y; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - cv::Mat src_roi; - cv::Mat dst_roi; - cv::Mat map1_roi; - cv::Mat map2_roi; - - //ocl mat for testing - cv::ocl::oclMat gdst; - - //ocl mat with roi - cv::ocl::oclMat gsrc_roi; - cv::ocl::oclMat gdst_roi; - cv::ocl::oclMat gmap1_roi; - cv::ocl::oclMat gmap2_roi; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - srcType = GET_PARAM(0); - map1Type = GET_PARAM(1); - map2Type = GET_PARAM(2); - interpolation = GET_PARAM(3); - bordertype = GET_PARAM(4); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size srcSize = cv::Size(MWIDTH, MHEIGHT); - cv::Size map1Size = cv::Size(MWIDTH, MHEIGHT); - double min = 5, max = 16; - - if(srcType != nulltype) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - src = randomMat(rng, srcSize, srcType, min, max, false); - } - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) - { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up"; - } - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1 = randomMat(rng, map1Size, map1Type, min, max, false); - map2 = randomMat(rng, map1Size, map1Type, min, max, false); + gen(src, size, size, all_type[j], 0, 256); + + resize(src, dst, Size(), 2.0, 2.0); + + CPU_ON; + resize(src, dst, Size(), 2.0, 2.0); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); + WARMUP_OFF; + + GPU_ON; + ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); + d_dst.download(dst); + GPU_FULL_OFF; } + } + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down"; + + gen(src, size, size, all_type[j], 0, 256); + + resize(src, dst, Size(), 0.5, 0.5); + + CPU_ON; + resize(src, dst, Size(), 0.5, 0.5); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); + WARMUP_OFF; + + GPU_ON; + ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); + d_dst.download(dst); + GPU_FULL_OFF; + } + + } +} +///////////// threshold//////////////////////// +TEST(threshold) +{ + Mat src, dst; + ocl::oclMat d_src, d_dst; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY"; + + gen(src, size, size, CV_8U, 0, 100); + + threshold(src, dst, 50.0, 0.0, THRESH_BINARY); + + CPU_ON; + threshold(src, dst, 50.0, 0.0, THRESH_BINARY); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); + WARMUP_OFF; + + GPU_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); + d_dst.download(dst); + GPU_FULL_OFF; + + } + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]"; + + gen(src, size, size, CV_32FC1, 0, 100); + + threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); + + CPU_ON; + threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); + WARMUP_OFF; + + GPU_ON; + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); + d_dst.download(dst); + GPU_FULL_OFF; + } +} +///////////// meanShiftFiltering//////////////////////// +TEST(meanShiftFiltering) +{ + int sp = 10, sr = 10; + Mat src, dst; + + ocl::oclMat d_src, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; + + gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); + + pyrMeanShiftFiltering(src, dst, sp, sr); + + CPU_ON; + pyrMeanShiftFiltering(src, dst, sp, sr); + CPU_OFF; + + gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + + d_src.upload(src); + + WARMUP_ON; + ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + WARMUP_OFF; + + GPU_ON; + ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + d_dst.download(dst); + GPU_FULL_OFF; + } +} +///////////// meanShiftProc//////////////////////// +COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) +{ + + int isr2 = sr * sr; + int c0, c1, c2, c3; + int iter; + uchar *ptr = NULL; + uchar *pstart = NULL; + int revx = 0, revy = 0; + c0 = sptr[0]; + c1 = sptr[1]; + c2 = sptr[2]; + c3 = sptr[3]; + + // iterate meanshift procedure + for (iter = 0; iter < maxIter; iter++) + { + int count = 0; + int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; + + //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) + int minx = x0 - sp; + int miny = y0 - sp; + int maxx = x0 + sp; + int maxy = y0 + sp; + + //deal with the image boundary + if (minx < 0) + { + minx = 0; + } + + if (miny < 0) + { + miny = 0; + } + + if (maxx >= size.width) + { + maxx = size.width - 1; + } + + if (maxy >= size.height) + { + maxy = size.height - 1; + } + + if (iter == 0) + { + pstart = sptr; + } else - cout << "The wrong input type" << endl; - - dst = randomMat(rng, map1Size, srcType, min, max, false); - switch (src.channels()) { - case 1: - val = cv::Scalar(rng.uniform(0.0, 10.0), 0, 0, 0); - break; - case 2: - val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0, 0); - break; - case 3: - val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), 0); - break; - case 4: - val = cv::Scalar(rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0), rng.uniform(0.0, 10.0)); + pstart = pstart + revy * sstep + (revx << 2); //point to the new position + } + + ptr = pstart; + ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row + + for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) + { + int rowCount = 0; + int x = minx; +#if CV_ENABLE_UNROLLED + + for (; x + 4 <= maxx; x += 4, ptr += 16) + { + int t0, t1, t2; + t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + + t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 1; + rowCount++; + } + + t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 2; + rowCount++; + } + + t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x + 3; + rowCount++; + } + } + +#endif + + for (; x <= maxx; x++, ptr += 4) + { + int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; + + if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) + { + s0 += t0; + s1 += t1; + s2 += t2; + sx += x; + rowCount++; + } + } + + if (rowCount == 0) + { + continue; + } + + count += rowCount; + sy += y * rowCount; + } + + if (count == 0) + { break; } - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - if(b) + int x1 = sx / count; + int y1 = sy / count; + s0 = s0 / count; + s1 = s1 / count; + s2 = s2 / count; + + bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + + tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); + + //revise the pointer corresponding to the new (y0,x0) + revx = x1 - x0; + revy = y1 - y0; + + x0 = x1; + y0 = y1; + c0 = s0; + c1 = s1; + c2 = s2; + + if (stopFlag) { - //randomize ROI - dst_roicols = dst.cols - 1; - dst_roirows = dst.rows - 1; - - src_roicols = src.cols - 1; - src_roirows = src.rows - 1; - - - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; + break; } - else - { - dst_roicols = dst.cols; - dst_roirows = dst.rows; + } //for iter - src_roicols = src.cols; - src_roirows = src.rows; - - - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - } - map1_roicols = dst_roicols; - map1_roirows = dst_roirows; - map2_roicols = dst_roicols; - map2_roirows = dst_roirows; - map1x = dstx; - map1y = dsty; - map2x = dstx; - map2y = dsty; - - if((map1Type == CV_16SC2 && map2Type == nulltype) || (map1Type == CV_32FC2 && map2Type == nulltype)) - { - map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); - gmap1_roi = map1_roi; - } - - else if (map1Type == CV_32FC1 && map2Type == CV_32FC1) - { - map1_roi = map1(Rect(map1x, map1y, map1_roicols, map1_roirows)); - map2_roi = map2(Rect(map2x, map2y, map2_roicols, map2_roirows)); - gmap1_roi = map1_roi; - gmap2_roi = map2_roi; - } - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - src_roi = dst(Rect(srcx, srcy, src_roicols, src_roirows)); - - } -}; - -TEST_P(Remap, Mat) -{ - if((interpolation == 1 && map1Type == CV_16SC2) || (map1Type == CV_32FC1 && map2Type == nulltype) || (map1Type == CV_16SC2 && map2Type == CV_32FC1) || (map1Type == CV_32FC2 && map2Type == CV_32FC1)) - { - cout << "LINEAR don't support the map1Type and map2Type" << endl; - return; - } - int bordertype[] = {cv::BORDER_CONSTANT, cv::BORDER_REPLICATE/*,BORDER_REFLECT,BORDER_WRAP,BORDER_REFLECT_101*/}; - const char *borderstr[] = {"BORDER_CONSTANT", "BORDER_REPLICATE"/*, "BORDER_REFLECT","BORDER_WRAP","BORDER_REFLECT_101"*/}; - cout << borderstr[0] << endl; -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::remap(src_roi, dst_roi, map1_roi, map2_roi, interpolation, bordertype[0], val); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start - gsrc_roi = src_roi; - gdst = dst; - gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - t2 = (double)cvGetTickCount();//kernel - cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - gdst.download(cpu_dst); - - t1 = (double)cvGetTickCount() - t1;//gpu end - - if (j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = 0; j < 2; j ++) - { - Has_roi(j); - gdst = dst; - gdst_roi = gdst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gsrc_roi = src_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::remap(gsrc_roi, gdst_roi, gmap1_roi, gmap2_roi, interpolation, bordertype[0], val); - }; -#endif + dptr[0] = (uchar)c0; + dptr[1] = (uchar)c1; + dptr[2] = (uchar)c2; + dptr[3] = (uchar)c3; + COOR coor; + coor.x = static_cast(x0); + coor.y = static_cast(y0); + return coor; } - -///////////////////////////////////////////////////////////////////////////////////////////////// -// resize - -PARAM_TEST_CASE(Resize, MatType, cv::Size, double, double, int) +void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) { - int type; - cv::Size dsize; - double fx, fy; - int interpolation; - //src mat - cv::Mat mat1; - cv::Mat dst; - - // set up roi - int src_roicols; - int src_roirows; - int dst_roicols; - int dst_roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + if (src_roi.empty()) { - type = GET_PARAM(0); - dsize = GET_PARAM(1); - fx = GET_PARAM(2); - fy = GET_PARAM(3); - interpolation = GET_PARAM(4); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - if(dsize == cv::Size() && !(fx > 0 && fy > 0)) - { - cout << "invalid dsize and fx fy" << endl; - return; - } - - if(dsize == cv::Size()) - { - dsize.width = (int)(size.width * fx); - dsize.height = (int)(size.height * fy); - } - - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, dsize, type, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - src_roicols = mat1.cols - 1; //start - src_roirows = mat1.rows - 1; - dst_roicols = dst.cols - 1; - dst_roirows = dst.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; - - } - else - { - src_roicols = mat1.cols; - src_roirows = mat1.rows; - dst_roicols = dst.cols; - dst_roirows = dst.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; - - }; - mat1_roi = mat1(Rect(src1x, src1y, src_roicols, src_roirows)); - dst_roi = dst(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - + CV_Error(CV_StsBadArg, "The input image is empty"); } -}; - -TEST_P(Resize, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + if (src_roi.depth() != CV_8U || src_roi.channels() != 4) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - cv::resize(mat1_roi, dst_roi, dsize, fx, fy, interpolation); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) + + CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && + (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); + CV_Assert(!(dstCoor_roi.step & 0x3)); + + if (!(crit.type & cv::TermCriteria::MAX_ITER)) { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, dst_roicols, dst_roirows)); - gmat1 = mat1_roi; - if(j == 0) + crit.maxCount = 5; + } + + int maxIter = std::min(std::max(crit.maxCount, 1), 100); + float eps; + + if (!(crit.type & cv::TermCriteria::EPS)) + { + eps = 1.f; + } + + eps = (float)std::max(crit.epsilon, 0.0); + + int tab[512]; + + for (int i = 0; i < 512; i++) + { + tab[i] = (i - 255) * (i - 255); + } + + uchar *sptr = src_roi.data; + uchar *dptr = dst_roi.data; + short *dCoorptr = (short *)dstCoor_roi.data; + int sstep = (int)src_roi.step; + int dstep = (int)dst_roi.step; + int dCoorstep = (int)dstCoor_roi.step >> 1; + cv::Size size = src_roi.size(); + + for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), + dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1)) + { + for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) { - cout << "no roi:"; + *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::resize(gmat1, gdst, dsize, fx, fy, interpolation); - }; -#endif + } } - -///////////////////////////////////////////////////////////////////////////////////////////////// -//threshold - -PARAM_TEST_CASE(Threshold, MatType, ThreshOp) +TEST(meanShiftProc) { - int type; - int threshOp; + Mat src, dst, dstCoor_roi; + ocl::oclMat d_src, d_dst, d_dstCoor_roi; - //src mat - cv::Mat mat1; - cv::Mat dst; + TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1); - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - threshOp = GET_PARAM(1); + SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); + gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); - mat1 = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); + meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; //start - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - dstx = 1; - dsty = 1; + CPU_ON; + meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); + CPU_OFF; - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - dstx = 0; - dsty = 0; + d_src.upload(src); - }; - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); + WARMUP_ON; + ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + WARMUP_OFF; + GPU_ON; + ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + ; + GPU_OFF; - } -}; + GPU_FULL_ON; + d_src.upload(src); + ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); + d_dst.download(dst); + d_dstCoor_roi.download(dstCoor_roi); + GPU_FULL_OFF; -TEST_P(Threshold, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - double maxVal = randomDouble(20.0, 127.0); - double thresh = randomDouble(0.0, maxVal); - t0 = (double)cvGetTickCount();//cpu start - cv::threshold(mat1_roi, dst_roi, thresh, maxVal, threshOp); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - double maxVal = randomDouble(20.0, 127.0); - double thresh = randomDouble(0.0, maxVal); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - gmat1 = mat1_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::threshold(gmat1, gdst, thresh, maxVal, threshOp); - }; -#endif - -} -/////////////////////////////////////////////////////////////////////////////////////////////////// -//meanShift - -PARAM_TEST_CASE(meanShiftTestBase, MatType, MatType, int, int, cv::TermCriteria) -{ - int type, typeCoor; - int sp, sr; - cv::TermCriteria crit; - //src mat - cv::Mat src; - cv::Mat dst; - cv::Mat dstCoor; - - //set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat src_roi; - cv::Mat dst_roi; - cv::Mat dstCoor_roi; - - //ocl dst mat - cv::ocl::oclMat gdst; - cv::ocl::oclMat gdstCoor; - - //std::vector oclinfo; - //ocl mat with roi - cv::ocl::oclMat gsrc_roi; - cv::ocl::oclMat gdst_roi; - cv::ocl::oclMat gdstCoor_roi; - - virtual void SetUp() - { - type = GET_PARAM(0); - typeCoor = GET_PARAM(1); - sp = GET_PARAM(2); - sr = GET_PARAM(3); - crit = GET_PARAM(4); - - cv::RNG &rng = TS::ptr()->get_rng(); - - // MWIDTH=256, MHEIGHT=256. defined in utility.hpp - cv::Size size = cv::Size(MWIDTH, MHEIGHT); - - src = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - dstCoor = randomMat(rng, size, typeCoor, 5, 16, false); - - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //cv::ocl::setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - if(b) - { - //randomize ROI - roicols = src.cols - 1; - roirows = src.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = src.cols; - roirows = src.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - dstCoor_roi = dstCoor(Rect(dstx, dsty, roicols, roirows)); - - gdst = dst; - gdstCoor = dstCoor; - } -}; - -/////////////////////////meanShiftFiltering///////////////////////////// -struct meanShiftFiltering : meanShiftTestBase {}; - -TEST_P(meanShiftFiltering, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - double totalgputick = 0; - double totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t1 = (double)cvGetTickCount();//gpu start1 - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_gdst; - gdst.download(cpu_gdst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::meanShiftFiltering(gsrc_roi, gdst_roi, sp, sr, crit); - }; -#endif - -} - -///////////////////////////meanShiftProc////////////////////////////////// -struct meanShiftProc : meanShiftTestBase {}; - -TEST_P(meanShiftProc, Mat) -{ - -#ifndef PRINT_KERNEL_RUN_TIME - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - double totalgputick = 0; - double totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t1 = (double)cvGetTickCount();//gpu start1 - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_gdstCoor; - gdstCoor.download(cpu_gdstCoor);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - - gsrc_roi = src_roi; - gdst_roi = gdst(Rect(dstx, dsty, roicols, roirows)); //gdst_roi - gdstCoor_roi = gdstCoor(Rect(dstx, dsty, roicols, roirows)); - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::meanShiftProc(gsrc_roi, gdst_roi, gdstCoor_roi, sp, sr, crit); - }; -#endif - -} - -/////////////////////////////////////////////////////////////////////////////////////////// -//hist - -void calcHistGold(const cv::Mat &src, cv::Mat &hist) -{ - hist.create(1, 256, CV_32SC1); - hist.setTo(cv::Scalar::all(0)); - - int *hist_row = hist.ptr(); - for (int y = 0; y < src.rows; ++y) - { - const uchar *src_row = src.ptr(y); - - for (int x = 0; x < src.cols; ++x) - ++hist_row[src_row[x]]; } } -PARAM_TEST_CASE(histTestBase, MatType, MatType) +///////////// remap//////////////////////// +TEST(remap) { - int type_src; + Mat src, dst, xmap, ymap; + ocl::oclMat d_src, d_dst, d_xmap, d_ymap; - //src mat - cv::Mat src; - cv::Mat dst_hist; - //set up roi - int roicols; - int roirows; - int srcx; - int srcy; - //src mat with roi - cv::Mat src_roi; - //ocl dst mat, dst_hist and gdst_hist don't have roi - cv::ocl::oclMat gdst_hist; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - //ocl mat with roi - cv::ocl::oclMat gsrc_roi; + int interpolation = INTER_LINEAR; + int borderMode = BORDER_CONSTANT; - // std::vector oclinfo; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type_src = GET_PARAM(0); + for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) + { + SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1"; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size = cv::Size(MWIDTH, MHEIGHT); + gen(src, size, size, all_type[t], 0, 256); - src = randomMat(rng, size, type_src, 0, 256, false); + xmap.create(size, size, CV_32FC1); + dst.create(size, size, CV_32FC1); + ymap.create(size, size, CV_32FC1); + + for (int i = 0; i < size; ++i) + { + float *xmap_row = xmap.ptr(i); + float *ymap_row = ymap.ptr(i); + + for (int j = 0; j < size; ++j) + { + xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f; + ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f; + } + } + + + remap(src, dst, xmap, ymap, interpolation, borderMode); + + CPU_ON; + remap(src, dst, xmap, ymap, interpolation, borderMode); + CPU_OFF; + + d_src.upload(src); + d_dst.upload(dst); + d_xmap.upload(xmap); + d_ymap.upload(ymap); + + WARMUP_ON; + ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); + WARMUP_OFF; + + GPU_ON; + ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); + d_dst.download(dst); + GPU_FULL_OFF; + } - // int devnums = getDevice(oclinfo); - // CV_Assert(devnums > 0); - //if you want to use undefault device, set it here - //setDevice(oclinfo[0]); } - - void Has_roi(int b) - { - if(b) - { - //randomize ROI - roicols = src.cols - 1; - roirows = src.rows - 1; - srcx = 1; - srcy = 1; - } - else - { - roicols = src.cols; - roirows = src.rows; - srcx = 0; - srcy = 0; - }; - src_roi = src(Rect(srcx, srcy, roicols, roirows)); - } -}; - -///////////////////////////calcHist/////////////////////////////////////// -struct calcHist : histTestBase {}; - -TEST_P(calcHist, Mat) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = 0; k < 2; k++) - { - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - calcHistGold(src_roi, dst_hist); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - - gsrc_roi = src_roi; - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::calcHist(gsrc_roi, gdst_hist); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_hist; - gdst_hist.download(cpu_hist);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalcputick = t0 + totalcputick; - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = 0; j < 2; j ++) - { - Has_roi(j); - - gsrc_roi = src_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::calcHist(gsrc_roi, gdst_hist); - }; -#endif -} - - -//************test******************* - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_8UC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, bilateralFilter, Combine( - Values(CV_8UC1, CV_8UC3), - NULL_TYPE, - Values(CV_8UC1, CV_8UC3), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, CopyMakeBorder, Combine( - Values(CV_8UC1, CV_8UC4/*, CV_32SC1*/), - NULL_TYPE, - Values(CV_8UC1, CV_8UC4/*,CV_32SC1*/), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerMinEigenVal, Combine( - Values(CV_8UC1, CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, cornerHarris, Combine( - Values(CV_8UC1, CV_32FC1), - NULL_TYPE, - ONE_TYPE(CV_32FC1), - NULL_TYPE, - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - - -INSTANTIATE_TEST_CASE_P(ImgprocTestBase, integral, Combine( - ONE_TYPE(CV_8UC1), - NULL_TYPE, - ONE_TYPE(CV_32SC1), - ONE_TYPE(CV_32FC1), - NULL_TYPE, - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(Imgproc, WarpAffine, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, - (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), - (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, WarpPerspective, Combine - (Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR, - (MatType)cv::INTER_CUBIC, (MatType)(cv::INTER_NEAREST | cv::WARP_INVERSE_MAP), - (MatType)(cv::INTER_LINEAR | cv::WARP_INVERSE_MAP), (MatType)(cv::INTER_CUBIC | cv::WARP_INVERSE_MAP)))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, Resize, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(cv::Size()), - Values(0.5/*, 1.5, 2*/), Values(0.5/*, 1.5, 2*/), Values((MatType)cv::INTER_NEAREST, (MatType)cv::INTER_LINEAR))); - - -INSTANTIATE_TEST_CASE_P(Imgproc, Threshold, Combine( - Values(CV_8UC1, CV_32FC1), Values(ThreshOp(cv::THRESH_BINARY), - ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), - ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV)))); - -INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftFiltering, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2),//it is no use in meanShiftFiltering - Values(5), - Values(6), - Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) - )); - -INSTANTIATE_TEST_CASE_P(Imgproc, meanShiftProc, Combine( - ONE_TYPE(CV_8UC4), - ONE_TYPE(CV_16SC2), - Values(5), - Values(6), - Values(cv::TermCriteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1)) - )); - -INSTANTIATE_TEST_CASE_P(Imgproc, Remap, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(CV_32FC1, CV_16SC2, CV_32FC2), Values(-1, CV_32FC1), - Values((int)cv::INTER_NEAREST, (int)cv::INTER_LINEAR), - Values((int)cv::BORDER_CONSTANT))); - -INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine( - ONE_TYPE(CV_8UC1), - ONE_TYPE(CV_32SC1) //no use - )); - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp index cb5e86bab..2828efe01 100644 --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@ -42,191 +42,105 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; -#ifndef MWC_TEST_UTILITY -#define MWC_TEST_UTILITY -//////// Utility -#ifndef DIFFERENT_SIZES -#else -#undef DIFFERENT_SIZES -#endif -#define DIFFERENT_SIZES testing::Values(cv::Size(256, 256), cv::Size(3000, 3000)) - -// Param class -#ifndef IMPLEMENT_PARAM_CLASS -#define IMPLEMENT_PARAM_CLASS(name, type) \ -class name \ -{ \ -public: \ - name ( type arg = type ()) : val_(arg) {} \ - operator type () const {return val_;} \ -private: \ - type val_; \ -}; \ - inline void PrintTo( name param, std::ostream* os) \ -{ \ - *os << #name << "(" << testing::PrintToString(static_cast< type >(param)) << ")"; \ -} - -IMPLEMENT_PARAM_CLASS(Channels, int) -#endif // IMPLEMENT_PARAM_CLASS -#endif // MWC_TEST_UTILITY - -//////////////////////////////////////////////////////////////////////////////// -// MatchTemplate -#define ALL_TEMPLATE_METHODS testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR), TemplateMethod(cv::TM_CCOEFF), TemplateMethod(cv::TM_SQDIFF_NORMED), TemplateMethod(cv::TM_CCORR_NORMED), TemplateMethod(cv::TM_CCOEFF_NORMED)) - -IMPLEMENT_PARAM_CLASS(TemplateSize, cv::Size); - -const char *TEMPLATE_METHOD_NAMES[6] = {"TM_SQDIFF", "TM_SQDIFF_NORMED", "TM_CCORR", "TM_CCORR_NORMED", "TM_CCOEFF", "TM_CCOEFF_NORMED"}; - -PARAM_TEST_CASE(MatchTemplate, cv::Size, TemplateSize, Channels, TemplateMethod) +/////////// matchTemplate //////////////////////// +//void InitMatchTemplate() +//{ +// Mat src; gen(src, 500, 500, CV_32F, 0, 1); +// Mat templ; gen(templ, 500, 500, CV_32F, 0, 1); +// ocl::oclMat d_src(src), d_templ(templ), d_dst; +// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); +//} +TEST(matchTemplate) { - cv::Size size; - cv::Size templ_size; - int cn; - int method; - //vector oclinfo; + //InitMatchTemplate(); - virtual void SetUp() + Mat src, templ, dst; + int templ_size = 5; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - size = GET_PARAM(0); - templ_size = GET_PARAM(1); - cn = GET_PARAM(2); - method = GET_PARAM(3); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); + int all_type[] = {CV_32FC1, CV_32FC4}; + std::string type_name[] = {"CV_32FC1", "CV_32FC4"}; + + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + for(templ_size = 5; templ_size <= 5; templ_size *= 5) + { + gen(src, size, size, all_type[j], 0, 1); + + SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR"; + + gen(templ, templ_size, templ_size, all_type[j], 0, 1); + + matchTemplate(src, templ, dst, CV_TM_CCORR); + + CPU_ON; + matchTemplate(src, templ, dst, CV_TM_CCORR); + CPU_OFF; + + ocl::oclMat d_src(src), d_templ, d_dst; + + d_templ.upload(templ); + + WARMUP_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); + WARMUP_OFF; + + GPU_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_templ.upload(templ); + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); + d_dst.download(dst); + GPU_FULL_OFF; + } + } + + int all_type_8U[] = {CV_8UC1}; + std::string type_name_8U[] = {"CV_8UC1"}; + + for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++) + { + for(templ_size = 5; templ_size <= 5; templ_size *= 5) + { + SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED"; + + gen(src, size, size, all_type_8U[j], 0, 255); + + gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255); + + matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); + + CPU_ON; + matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_templ(templ), d_dst; + + WARMUP_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); + WARMUP_OFF; + + GPU_ON; + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_templ.upload(templ); + ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); + d_dst.download(dst); + GPU_FULL_OFF; + } + } } -}; -struct MatchTemplate8U : MatchTemplate {}; - -TEST_P(MatchTemplate8U, Performance) -{ - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; - std::cout << "Channels: " << cn << std::endl; - - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_8U, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_8U, cn)); - cv::Mat dst_gold; - cv::ocl::oclMat dst; - - - - - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload - cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - -} - - -struct MatchTemplate32F : MatchTemplate {}; -TEST_P(MatchTemplate32F, Performance) -{ - std::cout << "Method: " << TEMPLATE_METHOD_NAMES[method] << std::endl; - std::cout << "Image Size: (" << size.width << ", " << size.height << ")" << std::endl; - std::cout << "Template Size: (" << templ_size.width << ", " << templ_size.height << ")" << std::endl; - std::cout << "Channels: " << cn << std::endl; - cv::Mat image = randomMat(size, CV_MAKETYPE(CV_32F, cn)); - cv::Mat templ = randomMat(templ_size, CV_MAKETYPE(CV_32F, cn)); - - cv::Mat dst_gold; - cv::ocl::oclMat dst; - - - - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - for(int j = 0; j < LOOP_TIMES; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat ocl_image = cv::ocl::oclMat(image);//upload - cv::ocl::oclMat ocl_templ = cv::ocl::oclMat(templ);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::matchTemplate(ocl_image, ocl_templ, dst, method); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - dst.download (cpu_dst);//download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - - -} - - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate8U, - testing::Combine( - testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)), - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4)/*, Channels(3)*/), - ALL_TEMPLATE_METHODS - ) - ); - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, MatchTemplate32F, testing::Combine( - testing::Values(cv::Size(1280, 1024), cv::Size(MWIDTH, MHEIGHT), cv::Size(1800, 1500)), - testing::Values(TemplateSize(cv::Size(5, 5)), TemplateSize(cv::Size(16, 16))/*, TemplateSize(cv::Size(30, 30))*/), - testing::Values(Channels(1), Channels(4) /*, Channels(3)*/), - testing::Values(TemplateMethod(cv::TM_SQDIFF), TemplateMethod(cv::TM_CCORR)))); - -#endif //HAVE_OPENCL \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index ba011f8df..495b2b82c 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -10,12 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Jia Haipeng, jiahaipeng95@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,697 +42,140 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; -using namespace cv::ocl; -////////////////////////////////converto///////////////////////////////////////////////// -PARAM_TEST_CASE(ConvertToTestBase, MatType, MatType) +///////////// ConvertTo//////////////////////// +TEST(ConvertTo) { - int type; - int dst_type; + Mat src, dst; + ocl::oclMat d_src, d_dst; - //src mat - cv::Mat mat; - cv::Mat dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - dst_type = GET_PARAM(1); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1"; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src, size, size, all_type[j], 0, 256); + //gen(dst, size, size, all_type[j], 0, 256); + + //d_dst.upload(dst); + + src.convertTo(dst, CV_32FC1); + + CPU_ON; + src.convertTo(dst, CV_32FC1); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + d_src.convertTo(d_dst, CV_32FC1); + WARMUP_OFF; + + GPU_ON; + d_src.convertTo(d_dst, CV_32FC1); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_src.convertTo(d_dst, CV_32FC1); + d_dst.download(dst); + GPU_FULL_OFF; + } - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat = mat_roi; - } -}; - - -struct ConvertTo : ConvertToTestBase {}; - -TEST_P(ConvertTo, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.convertTo(dst_roi, dst_type); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.convertTo(gdst, dst_type); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.convertTo(gdst, dst_type); - }; -#endif - } - - -///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// - -PARAM_TEST_CASE(CopyToTestBase, MatType, bool) +///////////// copyTo//////////////////////// +TEST(copyTo) { - int type; + Mat src, dst; + ocl::oclMat d_src, d_dst; - cv::Mat mat; - cv::Mat mask; - cv::Mat dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dstx; - int dsty; - int maskx; - int masky; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat mask_roi; - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst; - cv::ocl::oclMat gmask; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + gen(src, size, size, all_type[j], 0, 256); + //gen(dst, size, size, all_type[j], 0, 256); - mat = randomMat(rng, size, type, 5, 16, false); - dst = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); + //d_dst.upload(dst); + + src.copyTo(dst); + + CPU_ON; + src.copyTo(dst); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + d_src.copyTo(d_dst); + WARMUP_OFF; + + GPU_ON; + d_src.copyTo(d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_src.copyTo(d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - dstx = 1; - dsty = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dstx = 0; - dsty = 0; - maskx = 0; - masky = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - - //gdst_whole = dst; - //gdst = gdst_whole(Rect(dstx,dsty,roicols,roirows)); - - //gmat = mat_roi; - //gmask = mask_roi; - } -}; - -struct CopyTo : CopyToTestBase {}; - -TEST_P(CopyTo, Without_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.copyTo(dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.copyTo(gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.copyTo(gdst); - }; -#endif } - -TEST_P(CopyTo, With_mask) +///////////// setTo//////////////////////// +TEST(setTo) { -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) + Mat src, dst; + Scalar val(1, 2, 3, 4); + ocl::oclMat d_src, d_dst; + + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - Has_roi(k); + SUBTEST << size << 'x' << size << "; " << type_name[j] ; - t0 = (double)cvGetTickCount();//cpu start - mat_roi.copyTo(dst_roi, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end + gen(src, size, size, all_type[j], 0, 256); - t1 = (double)cvGetTickCount();//gpu start1 - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); + src.setTo(val); - gmat = mat_roi; - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.copyTo(gdst, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; + CPU_ON; + src.setTo(val); + CPU_OFF; + d_src.upload(src); + + WARMUP_ON; + d_src.setTo(val); + WARMUP_OFF; + + GPU_ON; + d_src.setTo(val); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_src.setTo(val); + GPU_FULL_OFF; } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; + } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - - gmat = mat_roi; - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.copyTo(gdst, gmask); - }; -#endif -} - -///////////////////////////////////////////copyto///////////////////////////////////////////////////////////// - -PARAM_TEST_CASE(SetToTestBase, MatType, bool) -{ - int type; - cv::Scalar val; - - cv::Mat mat; - cv::Mat mask; - - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int maskx; - int masky; - - //src mat with roi - cv::Mat mat_roi; - cv::Mat mask_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gmat_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gmask; - - virtual void SetUp() - { - type = GET_PARAM(0); - - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - - mat = randomMat(rng, size, type, 5, 16, false); - mask = randomMat(rng, size, CV_8UC1, 0, 2, false); - - cv::threshold(mask, mask, 0.5, 255., CV_8UC1); - val = cv::Scalar(rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0), rng.uniform(-10.0, 10.0)); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); - } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcy = 1; - maskx = 1; - masky = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - maskx = 0; - masky = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - mask_roi = mask(Rect(maskx, masky, roicols, roirows)); - - //gmat_whole = mat; - //gmat = gmat_whole(Rect(srcx,srcy,roicols,roirows)); - - //gmask = mask_roi; - } -}; - -struct SetTo : SetToTestBase {}; - -TEST_P(SetTo, Without_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.setTo(val); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - t2 = (double)cvGetTickCount(); //kernel - gmat.setTo(val); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gmat_whole.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.setTo(val); - }; -#endif -} - -TEST_P(SetTo, With_mask) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - - t0 = (double)cvGetTickCount();//cpu start - mat_roi.setTo(val, mask_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - - gmask = mask_roi; - t2 = (double)cvGetTickCount(); //kernel - gmat.setTo(val, gmask); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gmat_whole.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat_whole = mat; - gmat = gmat_whole(Rect(srcx, srcy, roicols, roirows)); - - gmask = mask_roi; - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - gmat.setTo(val, gmask); - }; -#endif -} -PARAM_TEST_CASE(DataTransfer, MatType, bool) -{ - int type; - cv::Mat mat; - cv::ocl::oclMat gmat_whole; - - virtual void SetUp() - { - type = GET_PARAM(0); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); - mat = randomMat(rng, size, type, 5, 16, false); - } -}; -TEST_P(DataTransfer, perf) -{ - double totaluploadtick = 0; - double totaldownloadtick = 0; - double totaltick = 0; - double t0 = 0; - double t1 = 0; - cv::Mat cpu_dst; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - t0 = (double)cvGetTickCount(); - gmat_whole.upload(mat);//upload - t0 = (double)cvGetTickCount() - t0; - - t1 = (double)cvGetTickCount(); - gmat_whole.download(cpu_dst);//download - t1 = (double)cvGetTickCount() - t1; - - if(j == 0) - continue; - totaluploadtick = t0 + totaluploadtick; - totaldownloadtick = t1 + totaldownloadtick; - } - totaltick = totaluploadtick + totaldownloadtick; - cout << "average upload time is " << totaluploadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average download time is " << totaldownloadtick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average data transfer time is " << totaltick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; -} -//**********test************ - -INSTANTIATE_TEST_CASE_P(MatrixOperation, ConvertTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4))); - -INSTANTIATE_TEST_CASE_P(MatrixOperation, CopyTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter - -INSTANTIATE_TEST_CASE_P(MatrixOperation, SetTo, Combine( - Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter -INSTANTIATE_TEST_CASE_P(MatrixOperation, DataTransfer, Combine( - Values(CV_8UC1, CV_8UC3, CV_8UC4, CV_32FC1, CV_32FC3, CV_32FC4), - Values(false))); // Values(false) is the reserved parameter -#endif +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp new file mode 100644 index 000000000..8b7118a6e --- /dev/null +++ b/modules/ocl/perf/perf_norm.cpp @@ -0,0 +1,84 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +///////////// norm//////////////////////// +TEST(norm) +{ + Mat src, buf; + ocl::oclMat d_src, d_buf; + + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF"; + + gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); + + norm(src, NORM_INF); + + CPU_ON; + norm(src, NORM_INF); + CPU_OFF; + + d_src.upload(src); + d_buf.upload(buf); + + WARMUP_ON; + ocl::norm(d_src, d_buf, NORM_INF); + WARMUP_OFF; + + GPU_ON; + ocl::norm(d_src, d_buf, NORM_INF); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::norm(d_src, d_buf, NORM_INF); + GPU_FULL_OFF; + } +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrdown.cpp b/modules/ocl/perf/perf_pyrdown.cpp index 2cea4df4a..1d1d2dec1 100644 --- a/modules/ocl/perf/perf_pyrdown.cpp +++ b/modules/ocl/perf/perf_pyrdown.cpp @@ -1,4 +1,4 @@ -/////////////////////////////////////////////////////////////////////////////////////// +/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// fangfang bai, fangfang@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,96 +42,46 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#include -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(PyrDown, MatType, int) +///////////// pyrDown ////////////////////// +TEST(pyrDown) { - int type; - int channels; - //src mat - cv::Mat mat1; - cv::Mat dst; + Mat src, dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - //std::vector oclinfo; - //ocl dst mat for testing - - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gdst; - - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } - - -}; - -#define VARNAME(A) string(#A); - -////////////////////////////////PyrDown///////////////////////////////////////////////// -TEST_P(PyrDown, Mat) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::RNG &rng = TS::ptr()->get_rng(); - mat1 = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - - - cv::ocl::oclMat gdst; - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) - { - - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat gmat1(mat1); - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pyrDown(gmat1, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - gdst.download(cpu_dst); - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if (j == 0) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - continue; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + pyrDown(src, dst); + + CPU_ON; + pyrDown(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::pyrDown(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pyrDown(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pyrDown(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - } - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - -} - -//********test**************** -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrDown, Combine( - Values(CV_8U, CV_32F), Values(1, 4))); - - -#endif // HAVE_OPENCL +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_pyrlk.cpp b/modules/ocl/perf/perf_pyrlk.cpp new file mode 100644 index 000000000..f7fc22b9d --- /dev/null +++ b/modules/ocl/perf/perf_pyrlk.cpp @@ -0,0 +1,143 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Fangfang Bai, fangfang@multicorewareinc.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ +#include "precomp.hpp" + +///////////// PyrLKOpticalFlow //////////////////////// +TEST(PyrLKOpticalFlow) +{ + std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; + std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; + + for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++) + { + Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); + + if (frame0.empty()) + { + std::string errstr = "can't open " + images1[i]; + throw runtime_error(errstr); + } + + Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); + + if (frame1.empty()) + { + std::string errstr = "can't open " + images2[i]; + throw runtime_error(errstr); + } + + Mat gray_frame; + + if (i == 0) + { + cvtColor(frame0, gray_frame, COLOR_BGR2GRAY); + } + + for (int points = Min_Size; points <= Max_Size; points *= Multiple) + { + if (i == 0) + SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points"; + else + SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points"; + Mat nextPts_cpu; + Mat status_cpu; + + vector pts; + goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0); + + vector nextPts; + vector status; + + vector err; + + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); + + CPU_ON; + calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); + CPU_OFF; + + ocl::PyrLKOpticalFlow d_pyrLK; + + ocl::oclMat d_frame0(frame0); + ocl::oclMat d_frame1(frame1); + + ocl::oclMat d_pts; + Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]); + d_pts.upload(pts_mat); + + ocl::oclMat d_nextPts; + ocl::oclMat d_status; + ocl::oclMat d_err; + + WARMUP_ON; + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); + WARMUP_OFF; + + GPU_ON; + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); + ; + GPU_OFF; + + GPU_FULL_ON; + d_frame0.upload(frame0); + d_frame1.upload(frame1); + d_pts.upload(pts_mat); + d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); + + if (!d_nextPts.empty()) + { + d_nextPts.download(nextPts_cpu); + } + + if (!d_status.empty()) + { + d_status.download(status_cpu); + } + + GPU_FULL_OFF; + } + + } +} diff --git a/modules/ocl/perf/perf_pyrup.cpp b/modules/ocl/perf/perf_pyrup.cpp index a023353ed..d3b3003a2 100644 --- a/modules/ocl/perf/perf_pyrup.cpp +++ b/modules/ocl/perf/perf_pyrup.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// fangfang bai fangfang@multicorewareinc.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,81 +42,46 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - -#include "opencv2/core/core.hpp" #include "precomp.hpp" -#include -#ifdef HAVE_OPENCL -using namespace cv; -using namespace cv::ocl; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(PyrUp, MatType, int) +///////////// pyrUp //////////////////////// +TEST(pyrUp) { - int type; - int channels; - //std::vector oclinfo; + Mat src, dst; + int all_type[] = {CV_8UC1, CV_8UC4}; + std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - virtual void SetUp() + for (int size = 500; size <= 2000; size *= 2) { - type = GET_PARAM(0); - channels = GET_PARAM(1); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - } -}; - -TEST_P(PyrUp, Performance) -{ - cv::Size size(MWIDTH, MHEIGHT); - cv::Mat src = randomMat(size, CV_MAKETYPE(type, channels)); - cv::Mat dst_gold; - cv::ocl::oclMat dst; - - - double totalgputick = 0; - double totalgputick_kernel = 0; - - double t1 = 0; - double t2 = 0; - - for (int j = 0; j < LOOP_TIMES + 1; j ++) - { - t1 = (double)cvGetTickCount();//gpu start1 - - cv::ocl::oclMat srcMat = cv::ocl::oclMat(src);//upload - - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::pyrUp(srcMat, dst); - t2 = (double)cvGetTickCount() - t2;//kernel - - cv::Mat cpu_dst; - dst.download(cpu_dst); //download - - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if (j == 0) + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) { - continue; + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + pyrUp(src, dst); + + CPU_ON; + pyrUp(src, dst); + CPU_OFF; + + ocl::oclMat d_src(src); + ocl::oclMat d_dst; + + WARMUP_ON; + ocl::pyrUp(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::pyrUp(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::pyrUp(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; } - - totalgputick = t1 + totalgputick; - - totalgputick_kernel = t2 + totalgputick_kernel; - } - - - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - - -} - -INSTANTIATE_TEST_CASE_P(GPU_ImgProc, PyrUp, Combine( - Values(CV_8U, CV_32F), Values(1, 4))); - -#endif // HAVE_OPENCL \ No newline at end of file +} \ No newline at end of file diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp index 67a3d24ae..48ff1ff15 100644 --- a/modules/ocl/perf/perf_split_merge.cpp +++ b/modules/ocl/perf/perf_split_merge.cpp @@ -10,12 +10,12 @@ // License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. // Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // @Authors -// Jia Haipeng, jiahaipeng95@gmail.com +// Fangfang Bai, fangfang@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -30,7 +30,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -42,446 +42,109 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" -#ifdef HAVE_OPENCL - -using namespace cvtest; -using namespace testing; -using namespace std; -using namespace cv::ocl; -PARAM_TEST_CASE(MergeTestBase, MatType, int) +///////////// Merge//////////////////////// +TEST(Merge) { - int type; - int channels; + Mat dst; + ocl::oclMat d_dst; - //src mat - cv::Mat mat1; - cv::Mat mat2; - cv::Mat mat3; - cv::Mat mat4; + int channels = 4; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - //dst mat - cv::Mat dst; - - // set up roi - int roicols; - int roirows; - int src1x; - int src1y; - int src2x; - int src2y; - int src3x; - int src3y; - int src4x; - int src4y; - int dstx; - int dsty; - - //src mat with roi - cv::Mat mat1_roi; - cv::Mat mat2_roi; - cv::Mat mat3_roi; - cv::Mat mat4_roi; - - //dst mat with roi - cv::Mat dst_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat1; - cv::ocl::oclMat gmat2; - cv::ocl::oclMat gmat3; - cv::ocl::oclMat gmat4; - cv::ocl::oclMat gdst; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - channels = GET_PARAM(1); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + Size size1 = Size(size, size); + std::vector src(channels); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + for (int i = 0; i < channels; ++i) + { + src[i] = Mat(size1, all_type[j], cv::Scalar::all(i)); + } + + merge(src, dst); + + CPU_ON; + merge(src, dst); + CPU_OFF; + + std::vector d_src(channels); + + for (int i = 0; i < channels; ++i) + { + d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i)); + } + + WARMUP_ON; + ocl::merge(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::merge(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + + for (int i = 0; i < channels; ++i) + { + d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i)); + } + + ocl::merge(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } - mat1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - mat4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat1.cols - 1; //start - roirows = mat1.rows - 1; - src1x = 1; - src1y = 1; - src2x = 1; - src2y = 1; - src3x = 1; - src3y = 1; - src4x = 1; - src4y = 1; - dstx = 1; - dsty = 1; - - } - else - { - roicols = mat1.cols; - roirows = mat1.rows; - src1x = 0; - src1y = 0; - src2x = 0; - src2y = 0; - src3x = 0; - src3y = 0; - src4x = 0; - src4y = 0; - dstx = 0; - dsty = 0; - }; - - mat1_roi = mat1(Rect(src1x, src1y, roicols, roirows)); - mat2_roi = mat2(Rect(src2x, src2y, roicols, roirows)); - mat3_roi = mat3(Rect(src3x, src3y, roicols, roirows)); - mat4_roi = mat4(Rect(src4x, src4y, roicols, roirows)); - - - dst_roi = dst(Rect(dstx, dsty, roicols, roirows)); - } - -}; - -struct Merge : MergeTestBase {}; - -TEST_P(Merge, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - std::vector dev_src; - dev_src.push_back(mat1_roi); - dev_src.push_back(mat2_roi); - dev_src.push_back(mat3_roi); - dev_src.push_back(mat4_roi); - t0 = (double)cvGetTickCount();//cpu start - cv::merge(dev_src, dst_roi); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 ] - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmat3 = mat3_roi; - gmat4 = mat4_roi; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - std::vector dev_gsrc; - dev_gsrc.push_back(gmat1); - dev_gsrc.push_back(gmat2); - dev_gsrc.push_back(gmat3); - dev_gsrc.push_back(gmat4); - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::merge(dev_gsrc, gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst; - gdst_whole.download (cpu_dst);//download - t1 = (double)cvGetTickCount() - t1;//gpu end1 - - if(j == 0) - continue; - - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - gmat1 = mat1_roi; - gmat2 = mat2_roi; - gmat3 = mat3_roi; - gmat4 = mat4_roi; - gdst_whole = dst; - gdst = gdst_whole(Rect(dstx, dsty, roicols, roirows)); - std::vector dev_gsrc; - dev_gsrc.push_back(gmat1); - dev_gsrc.push_back(gmat2); - dev_gsrc.push_back(gmat3); - dev_gsrc.push_back(gmat4); - - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::merge(dev_gsrc, gdst); - }; -#endif } - -PARAM_TEST_CASE(SplitTestBase, MatType, int) +///////////// Split//////////////////////// +TEST(Split) { - int type; - int channels; + //int channels = 4; + int all_type[] = {CV_8UC1, CV_32FC1}; + std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - //src mat - cv::Mat mat; - - //dstmat - cv::Mat dst1; - cv::Mat dst2; - cv::Mat dst3; - cv::Mat dst4; - - // set up roi - int roicols; - int roirows; - int srcx; - int srcy; - int dst1x; - int dst1y; - int dst2x; - int dst2y; - int dst3x; - int dst3y; - int dst4x; - int dst4y; - - //src mat with roi - cv::Mat mat_roi; - - //dst mat with roi - cv::Mat dst1_roi; - cv::Mat dst2_roi; - cv::Mat dst3_roi; - cv::Mat dst4_roi; - //std::vector oclinfo; - //ocl dst mat for testing - cv::ocl::oclMat gdst1_whole; - cv::ocl::oclMat gdst2_whole; - cv::ocl::oclMat gdst3_whole; - cv::ocl::oclMat gdst4_whole; - - //ocl mat with roi - cv::ocl::oclMat gmat; - cv::ocl::oclMat gdst1; - cv::ocl::oclMat gdst2; - cv::ocl::oclMat gdst3; - cv::ocl::oclMat gdst4; - - virtual void SetUp() + for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - type = GET_PARAM(0); - channels = GET_PARAM(1); + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j]; + Size size1 = Size(size, size); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Size size(MWIDTH, MHEIGHT); + Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); + + std::vector dst; + + split(src, dst); + + CPU_ON; + split(src, dst); + CPU_OFF; + + ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); + std::vector d_dst; + + WARMUP_ON; + ocl::split(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::split(d_src, d_dst); + ; + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::split(d_src, d_dst); + GPU_FULL_OFF; + } - mat = randomMat(rng, size, CV_MAKETYPE(type, channels), 5, 16, false); - dst1 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst2 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst3 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - dst4 = randomMat(rng, size, CV_MAKETYPE(type, 1), 5, 16, false); - //int devnums = getDevice(oclinfo); - //CV_Assert(devnums > 0); - ////if you want to use undefault device, set it here - ////setDevice(oclinfo[0]); - //setBinpath(CLBINPATH); } - - void Has_roi(int b) - { - //cv::RNG& rng = TS::ptr()->get_rng(); - if(b) - { - //randomize ROI - roicols = mat.cols - 1; //start - roirows = mat.rows - 1; - srcx = 1; - srcx = 1; - dst1x = 1; - dst1y = 1; - dst2x = 1; - dst2y = 1; - dst3x = 1; - dst3y = 1; - dst4x = 1; - dst4y = 1; - } - else - { - roicols = mat.cols; - roirows = mat.rows; - srcx = 0; - srcy = 0; - dst1x = 0; - dst1y = 0; - dst2x = 0; - dst2y = 0; - dst3x = 0; - dst3y = 0; - dst4x = 0; - dst4y = 0; - }; - - mat_roi = mat(Rect(srcx, srcy, roicols, roirows)); - - dst1_roi = dst1(Rect(dst1x, dst1y, roicols, roirows)); - dst2_roi = dst2(Rect(dst2x, dst2y, roicols, roirows)); - dst3_roi = dst3(Rect(dst3x, dst3y, roicols, roirows)); - dst4_roi = dst4(Rect(dst4x, dst4y, roicols, roirows)); - } - -}; - -struct Split : SplitTestBase {}; - -TEST_P(Split, Accuracy) -{ -#ifndef PRINT_KERNEL_RUN_TIME - double totalcputick = 0; - double totalgputick = 0; - double totalgputick_kernel = 0; - double t0 = 0; - double t1 = 0; - double t2 = 0; - for(int k = LOOPROISTART; k < LOOPROIEND; k++) - { - totalcputick = 0; - totalgputick = 0; - totalgputick_kernel = 0; - for(int j = 0; j < LOOP_TIMES + 1; j ++) - { - Has_roi(k); - cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi}; - cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4}; - t0 = (double)cvGetTickCount();//cpu start - cv::split(mat_roi, dev_dst); - t0 = (double)cvGetTickCount() - t0;//cpu end - - t1 = (double)cvGetTickCount();//gpu start1 - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows)); - - gdst2_whole = dst2; - gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows)); - - gdst3_whole = dst3; - gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows)); - - gdst4_whole = dst4; - gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows)); - - gmat = mat_roi; - t2 = (double)cvGetTickCount(); //kernel - cv::ocl::split(gmat, dev_gdst); - t2 = (double)cvGetTickCount() - t2;//kernel - cv::Mat cpu_dst1; - cv::Mat cpu_dst2; - cv::Mat cpu_dst3; - cv::Mat cpu_dst4; - gdst1_whole.download(cpu_dst1); - gdst2_whole.download(cpu_dst2); - gdst3_whole.download(cpu_dst3); - gdst4_whole.download(cpu_dst4); - t1 = (double)cvGetTickCount() - t1;//gpu end1 - if(j == 0) - continue; - totalgputick = t1 + totalgputick; - totalcputick = t0 + totalcputick; - totalgputick_kernel = t2 + totalgputick_kernel; - - } - if(k == 0) - { - cout << "no roi\n"; - } - else - { - cout << "with roi\n"; - }; - cout << "average cpu runtime is " << totalcputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime is " << totalgputick / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - cout << "average gpu runtime without data transfer is " << totalgputick_kernel / ((double)cvGetTickFrequency()* LOOP_TIMES * 1000.) << "ms" << endl; - } -#else - for(int j = LOOPROISTART; j < LOOPROIEND; j ++) - { - Has_roi(j); - //cv::Mat dev_dst[4] = {dst1_roi, dst2_roi, dst3_roi, dst4_roi}; - cv::ocl::oclMat dev_gdst[4] = {gdst1, gdst2, gdst3, gdst4}; - gdst1_whole = dst1; - gdst1 = gdst1_whole(Rect(dst1x, dst1y, roicols, roirows)); - - gdst2_whole = dst2; - gdst2 = gdst2_whole(Rect(dst2x, dst2y, roicols, roirows)); - - gdst3_whole = dst3; - gdst3 = gdst3_whole(Rect(dst3x, dst3y, roicols, roirows)); - - gdst4_whole = dst4; - gdst4 = gdst4_whole(Rect(dst4x, dst4y, roicols, roirows)); - gmat = mat_roi; - if(j == 0) - { - cout << "no roi:"; - } - else - { - cout << "\nwith roi:"; - }; - cv::ocl::split(gmat, dev_gdst); - }; -#endif } - -//*************test***************** -INSTANTIATE_TEST_CASE_P(SplitMerge, Merge, Combine( - Values(CV_8UC4, CV_32FC4), Values(1, 4))); - -INSTANTIATE_TEST_CASE_P(SplitMerge, Split , Combine( - Values(CV_8U, CV_32S, CV_32F), Values(1, 4))); - -#endif // HAVE_OPENCL diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index 7d287004e..e35a07145 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -7,12 +7,13 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -// + // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,12 +22,12 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -41,4 +42,321 @@ #include "precomp.hpp" +// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files +// All images needed in this test are in samples/gpu folder. +// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory +void TestSystem::run() +{ + if (is_list_mode_) + { + for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) + { + cout << (*it)->name() << endl; + } + + return; + } + + // Run test initializers + for (vector::iterator it = inits_.begin(); it != inits_.end(); ++it) + { + if ((*it)->name().find(test_filter_, 0) != string::npos) + { + (*it)->run(); + } + } + + printHeading(); + writeHeading(); + + // Run tests + for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) + { + try + { + if ((*it)->name().find(test_filter_, 0) != string::npos) + { + cout << endl << (*it)->name() << ":\n"; + + setCurrentTest((*it)->name()); + //fprintf(record_,"%s\n",(*it)->name().c_str()); + + (*it)->run(); + finishCurrentSubtest(); + } + } + catch (const Exception &) + { + // Message is printed via callback + resetCurrentSubtest(); + } + catch (const runtime_error &e) + { + printError(e.what()); + resetCurrentSubtest(); + } + } + + printSummary(); + writeSummary(); +} + + +void TestSystem::finishCurrentSubtest() +{ + if (cur_subtest_is_empty_) + // There is no need to print subtest statistics + { + return; + } + + double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; + double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; + double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0; + + double speedup = static_cast(cpu_elapsed_) / std::max(1.0, gpu_elapsed_); + speedup_total_ += speedup; + + double fullspeedup = static_cast(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_); + speedup_full_total_ += fullspeedup; + + if (speedup > top_) + { + speedup_faster_count_++; + } + else if (speedup < bottom_) + { + speedup_slower_count_++; + } + else + { + speedup_equal_count_++; + } + + if (fullspeedup > top_) + { + speedup_full_faster_count_++; + } + else if (fullspeedup < bottom_) + { + speedup_full_slower_count_++; + } + else + { + speedup_full_equal_count_++; + } + + // compute min, max and + std::sort(gpu_times_.begin(), gpu_times_.end()); + double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0; + double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0; + double deviation = 0; + + if (gpu_times_.size() > 1) + { + double sum = 0; + + for (size_t i = 0; i < gpu_times_.size(); i++) + { + int64 diff = gpu_times_[i] - static_cast(gpu_elapsed_); + double diff_time = diff * 1000 / getTickFrequency(); + sum += diff_time * diff_time; + } + + deviation = std::sqrt(sum / gpu_times_.size()); + } + + printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); + writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); + + num_subtests_called_++; + resetCurrentSubtest(); +} + + +double TestSystem::meanTime(const vector &samples) +{ + double sum = accumulate(samples.begin(), samples.end(), 0.); + return sum / samples.size(); +} + + +void TestSystem::printHeading() +{ + cout << endl; + cout << setiosflags(ios_base::left); + cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" + << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP" + << "DESCRIPTION\n"; + + cout << resetiosflags(ios_base::left); +} + +void TestSystem::writeHeading() +{ + if (!record_) + { + recordname_ += "_OCL.csv"; + record_ = fopen(recordname_.c_str(), "w"); + } + + fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n"); + + fflush(record_); +} + +void TestSystem::printSummary() +{ + cout << setiosflags(ios_base::fixed); + cout << "\naverage GPU speedup: x" + << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_) + << endl; + cout << "\nGPU exceeded: " + << setprecision(3) << speedup_faster_count_ + << "\nGPU passed: " + << setprecision(3) << speedup_equal_count_ + << "\nGPU failed: " + << setprecision(3) << speedup_slower_count_ + << endl; + cout << "\nGPU exceeded rate: " + << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPU passed rate: " + << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPU failed rate: " + << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << endl; + cout << "\naverage GPUTOTAL speedup: x" + << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_) + << endl; + cout << "\nGPUTOTAL exceeded: " + << setprecision(3) << speedup_full_faster_count_ + << "\nGPUTOTAL passed: " + << setprecision(3) << speedup_full_equal_count_ + << "\nGPUTOTAL failed: " + << setprecision(3) << speedup_full_slower_count_ + << endl; + cout << "\nGPUTOTAL exceeded rate: " + << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPUTOTAL passed rate: " + << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << "\nGPUTOTAL failed rate: " + << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 + << "%" + << endl; + cout << resetiosflags(ios_base::fixed); +} + + +void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup) +{ + cout << TAB << setiosflags(ios_base::left); + stringstream stream; + + stream << cpu_time; + cout << setw(10) << stream.str(); + + stream.str(""); + stream << gpu_time; + cout << setw(10) << stream.str(); + + stream.str(""); + stream << "x" << setprecision(3) << speedup; + cout << setw(14) << stream.str(); + + stream.str(""); + stream << gpu_full_time; + cout << setw(14) << stream.str(); + + stream.str(""); + stream << "x" << setprecision(3) << fullspeedup; + cout << setw(14) << stream.str(); + + cout << cur_subtest_description_.str(); + cout << resetiosflags(ios_base::left) << endl; +} + +void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev) +{ + if (!record_) + { + recordname_ += ".csv"; + record_ = fopen(recordname_.c_str(), "w"); + } + + fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "", + cur_subtest_description_.str().c_str(), + cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup, + gpu_min, gpu_max, std_dev); + + if (itname_changed_) + { + itname_changed_ = false; + } + + fflush(record_); +} + +void TestSystem::writeSummary() +{ + if (!record_) + { + recordname_ += ".csv"; + record_ = fopen(recordname_.c_str(), "w"); + } + + fprintf(record_, "\nAverage GPU speedup: %.3f\n" + "exceeded: %d (%.3f%%)\n" + "passed: %d (%.3f%%)\n" + "failed: %d (%.3f%%)\n" + "\nAverage GPUTOTAL speedup: %.3f\n" + "exceeded: %d (%.3f%%)\n" + "passed: %d (%.3f%%)\n" + "failed: %d (%.3f%%)\n", + speedup_total_ / std::max(1, num_subtests_called_), + speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100, + speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100, + speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100, + speedup_full_total_ / std::max(1, num_subtests_called_), + speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100, + speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100, + speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 + ); + fflush(record_); +} + +void TestSystem::printError(const std::string &msg) +{ + if(msg != "CL_INVALID_BUFFER_SIZE") + { + cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl; + } +} + +void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high) +{ + mat.create(rows, cols, type); + RNG rng(0); + rng.fill(mat, RNG::UNIFORM, low, high); +} + + +string abspath(const string &relpath) +{ + return TestSystem::instance().workingDir() + relpath; +} + + +int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/, + const char *err_msg, const char * /*file_name*/, + int /*line*/, void * /*userdata*/) +{ + TestSystem::instance().printError(err_msg); + return 0; +} + diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp index 34eea555f..819ac5925 100644 --- a/modules/ocl/perf/precomp.hpp +++ b/modules/ocl/perf/precomp.hpp @@ -7,12 +7,13 @@ // copy or use the software. // // -// Intel License Agreement +// License Agreement // For Open Source Computer Vision Library // -// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. // Third party copyrights are property of their respective owners. -// + // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // @@ -21,12 +22,12 @@ // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. +// and/or other oclMaterials provided with the distribution. // -// * The name of Intel Corporation may not be used to endorse or promote products +// * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -39,43 +40,352 @@ // //M*/ -#ifdef __GNUC__ -# pragma GCC diagnostic ignored "-Wmissing-declarations" -# if defined __clang__ || defined __APPLE__ -# pragma GCC diagnostic ignored "-Wmissing-prototypes" -# pragma GCC diagnostic ignored "-Wextra" -# endif -#endif - -#ifndef __OPENCV_TEST_PRECOMP_HPP__ -#define __OPENCV_TEST_PRECOMP_HPP__ - -#include -#include +#include +#include +#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include "cvconfig.h" +#include +#include +#include #include "opencv2/core/core.hpp" -#include "opencv2/highgui/highgui.hpp" -//#include "opencv2/calib3d/calib3d.hpp" #include "opencv2/imgproc/imgproc.hpp" +#include "opencv2/highgui/highgui.hpp" #include "opencv2/video/video.hpp" -#include "opencv2/ts/ts.hpp" -#include "opencv2/ts/ts_perf.hpp" +#include "opencv2/objdetect/objdetect.hpp" +#include "opencv2/features2d/features2d.hpp" #include "opencv2/ocl/ocl.hpp" -//#include "opencv2/nonfree/nonfree.hpp" -#include "utility.hpp" -#include "interpolation.hpp" -//#include "add_test_info.h" -//#define PERF_TEST_OCL 1 +#define Min_Size 1000 +#define Max_Size 4000 +#define Multiple 2 +#define TAB " " -#endif +using namespace std; +using namespace cv; +void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high); +string abspath(const string &relpath); +int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *); +typedef struct +{ + short x; + short y; +} COOR; +COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, + cv::Size size, int sp, int sr, int maxIter, float eps, int *tab); +void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, + int sp, int sr, cv::TermCriteria crit); + +class Runnable +{ +public: + explicit Runnable(const std::string &runname): name_(runname) {} + virtual ~Runnable() {} + + const std::string &name() const + { + return name_; + } + + virtual void run() = 0; + +private: + std::string name_; +}; + +class TestSystem +{ +public: + static TestSystem &instance() + { + static TestSystem me; + return me; + } + + void setWorkingDir(const std::string &val) + { + working_dir_ = val; + } + const std::string &workingDir() const + { + return working_dir_; + } + + void setTestFilter(const std::string &val) + { + test_filter_ = val; + } + const std::string &testFilter() const + { + return test_filter_; + } + + void setNumIters(int num_iters) + { + num_iters_ = num_iters; + } + void setGPUWarmupIters(int num_iters) + { + gpu_warmup_iters_ = num_iters; + } + void setCPUIters(int num_iters) + { + cpu_num_iters_ = num_iters; + } + + void setTopThreshold(double top) + { + top_ = top; + } + void setBottomThreshold(double bottom) + { + bottom_ = bottom; + } + + void addInit(Runnable *init) + { + inits_.push_back(init); + } + void addTest(Runnable *test) + { + tests_.push_back(test); + } + void run(); + + // It's public because OpenCV callback uses it + void printError(const std::string &msg); + + std::stringstream &startNewSubtest() + { + finishCurrentSubtest(); + return cur_subtest_description_; + } + + bool stop() const + { + return cur_iter_idx_ >= num_iters_; + } + + bool cpu_stop() const + { + return cur_iter_idx_ >= cpu_num_iters_; + } + + bool warmupStop() + { + return cur_warmup_idx_++ >= gpu_warmup_iters_; + } + + void warmupComplete() + { + cur_warmup_idx_ = 0; + } + + void cpuOn() + { + cpu_started_ = cv::getTickCount(); + } + void cpuOff() + { + int64 delta = cv::getTickCount() - cpu_started_; + cpu_times_.push_back(delta); + ++cur_iter_idx_; + } + void cpuComplete() + { + cpu_elapsed_ += meanTime(cpu_times_); + cur_subtest_is_empty_ = false; + cur_iter_idx_ = 0; + } + + void gpuOn() + { + gpu_started_ = cv::getTickCount(); + } + void gpuOff() + { + int64 delta = cv::getTickCount() - gpu_started_; + gpu_times_.push_back(delta); + ++cur_iter_idx_; + } + void gpuComplete() + { + gpu_elapsed_ += meanTime(gpu_times_); + cur_subtest_is_empty_ = false; + cur_iter_idx_ = 0; + } + + void gpufullOn() + { + gpu_full_started_ = cv::getTickCount(); + } + void gpufullOff() + { + int64 delta = cv::getTickCount() - gpu_full_started_; + gpu_full_times_.push_back(delta); + ++cur_iter_idx_; + } + void gpufullComplete() + { + gpu_full_elapsed_ += meanTime(gpu_full_times_); + cur_subtest_is_empty_ = false; + cur_iter_idx_ = 0; + } + + bool isListMode() const + { + return is_list_mode_; + } + void setListMode(bool value) + { + is_list_mode_ = value; + } + + void setRecordName(const std::string &name) + { + recordname_ = name; + } + + void setCurrentTest(const std::string &name) + { + itname_ = name; + itname_changed_ = true; + } + +private: + TestSystem(): + cur_subtest_is_empty_(true), cpu_elapsed_(0), + gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0), + num_subtests_called_(0), + speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0), + speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false), + num_iters_(10), cpu_num_iters_(2), + gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), + record_(0), recordname_("performance"), itname_changed_(true) + { + cpu_times_.reserve(num_iters_); + gpu_times_.reserve(num_iters_); + gpu_full_times_.reserve(num_iters_); + } + + void finishCurrentSubtest(); + void resetCurrentSubtest() + { + cpu_elapsed_ = 0; + gpu_elapsed_ = 0; + gpu_full_elapsed_ = 0; + cur_subtest_description_.str(""); + cur_subtest_is_empty_ = true; + cur_iter_idx_ = 0; + cpu_times_.clear(); + gpu_times_.clear(); + gpu_full_times_.clear(); + } + + double meanTime(const std::vector &samples); + + void printHeading(); + void printSummary(); + void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f); + + void writeHeading(); + void writeSummary(); + void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, + double speedup = 0.0f, double fullspeedup = 0.0f, + double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f); + + std::string working_dir_; + std::string test_filter_; + + std::vector inits_; + std::vector tests_; + + std::stringstream cur_subtest_description_; + bool cur_subtest_is_empty_; + + int64 cpu_started_; + int64 gpu_started_; + int64 gpu_full_started_; + double cpu_elapsed_; + double gpu_elapsed_; + double gpu_full_elapsed_; + + double speedup_total_; + double speedup_full_total_; + int num_subtests_called_; + + int speedup_faster_count_; + int speedup_slower_count_; + int speedup_equal_count_; + + int speedup_full_faster_count_; + int speedup_full_slower_count_; + int speedup_full_equal_count_; + + bool is_list_mode_; + + double top_; + double bottom_; + + int num_iters_; + int cpu_num_iters_; //there's no need to set cpu running same times with gpu + int gpu_warmup_iters_; //gpu warm up times, default is 1 + int cur_iter_idx_; + int cur_warmup_idx_; //current gpu warm up times + std::vector cpu_times_; + std::vector gpu_times_; + std::vector gpu_full_times_; + + FILE *record_; + std::string recordname_; + std::string itname_; + bool itname_changed_; +}; + + +#define GLOBAL_INIT(name) \ +struct name##_init: Runnable { \ + name##_init(): Runnable(#name) { \ + TestSystem::instance().addInit(this); \ +} \ + void run(); \ +} name##_init_instance; \ + void name##_init::run() + + +#define TEST(name) \ +struct name##_test: Runnable { \ + name##_test(): Runnable(#name) { \ + TestSystem::instance().addTest(this); \ +} \ + void run(); \ +} name##_test_instance; \ + void name##_test::run() + +#define SUBTEST TestSystem::instance().startNewSubtest() + +#define CPU_ON \ + while (!TestSystem::instance().cpu_stop()) { \ + TestSystem::instance().cpuOn() +#define CPU_OFF \ + TestSystem::instance().cpuOff(); \ + } TestSystem::instance().cpuComplete() + +#define GPU_ON \ + while (!TestSystem::instance().stop()) { \ + TestSystem::instance().gpuOn() +#define GPU_OFF \ + TestSystem::instance().gpuOff(); \ + } TestSystem::instance().gpuComplete() + +#define GPU_FULL_ON \ + while (!TestSystem::instance().stop()) { \ + TestSystem::instance().gpufullOn() +#define GPU_FULL_OFF \ + TestSystem::instance().gpufullOff(); \ + } TestSystem::instance().gpufullComplete() + +#define WARMUP_ON \ + while (!TestSystem::instance().warmupStop()) { +#define WARMUP_OFF \ + } TestSystem::instance().warmupComplete() diff --git a/modules/ocl/perf/utility.cpp b/modules/ocl/perf/utility.cpp deleted file mode 100644 index b7fbe4fa0..000000000 --- a/modules/ocl/perf/utility.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#define VARNAME(A) #A -using namespace std; -using namespace cv; -using namespace cv::gpu; -using namespace cvtest; - - -//std::string generateVarList(int first,...) -//{ -// vector varname; -// -// va_list argp; -// string s; -// stringstream ss; -// va_start(argp,first); -// int i=first; -// while(i!=-1) -// { -// ss<get_rng(); - return rng.uniform(minVal, maxVal); -} - -double randomDouble(double minVal, double maxVal) -{ - RNG &rng = TS::ptr()->get_rng(); - return rng.uniform(minVal, maxVal); -} - -Size randomSize(int minVal, int maxVal) -{ - return cv::Size(randomInt(minVal, maxVal), randomInt(minVal, maxVal)); -} - -Scalar randomScalar(double minVal, double maxVal) -{ - return Scalar(randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal), randomDouble(minVal, maxVal)); -} - -Mat randomMat(Size size, int type, double minVal, double maxVal) -{ - return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); -} - - - - - - - -/* -void showDiff(InputArray gold_, InputArray actual_, double eps) -{ - Mat gold; - if (gold_.kind() == _InputArray::MAT) - gold = gold_.getMat(); - else - gold_.getGpuMat().download(gold); - - Mat actual; - if (actual_.kind() == _InputArray::MAT) - actual = actual_.getMat(); - else - actual_.getGpuMat().download(actual); - - Mat diff; - absdiff(gold, actual, diff); - threshold(diff, diff, eps, 255.0, cv::THRESH_BINARY); - - namedWindow("gold", WINDOW_NORMAL); - namedWindow("actual", WINDOW_NORMAL); - namedWindow("diff", WINDOW_NORMAL); - - imshow("gold", gold); - imshow("actual", actual); - imshow("diff", diff); - - waitKey(); -} -*/ - -/* -bool supportFeature(const DeviceInfo& info, FeatureSet feature) -{ - return TargetArchs::builtWith(feature) && info.supports(feature); -} - -const vector& devices() -{ - static vector devs; - static bool first = true; - - if (first) - { - int deviceCount = getCudaEnabledDeviceCount(); - - devs.reserve(deviceCount); - - for (int i = 0; i < deviceCount; ++i) - { - DeviceInfo info(i); - if (info.isCompatible()) - devs.push_back(info); - } - - first = false; - } - - return devs; -} - -vector devices(FeatureSet feature) -{ - const vector& d = devices(); - - vector devs_filtered; - - if (TargetArchs::builtWith(feature)) - { - devs_filtered.reserve(d.size()); - - for (size_t i = 0, size = d.size(); i < size; ++i) - { - const DeviceInfo& info = d[i]; - - if (info.supports(feature)) - devs_filtered.push_back(info); - } - } - - return devs_filtered; -} -*/ - -vector types(int depth_start, int depth_end, int cn_start, int cn_end) -{ - vector v; - - v.reserve((depth_end - depth_start + 1) * (cn_end - cn_start + 1)); - - for (int depth = depth_start; depth <= depth_end; ++depth) - { - for (int cn = cn_start; cn <= cn_end; ++cn) - { - v.push_back(CV_MAKETYPE(depth, cn)); - } - } - - return v; -} - -const vector &all_types() -{ - static vector v = types(CV_8U, CV_64F, 1, 4); - - return v; -} - -Mat readImage(const string &fileName, int flags) -{ - return imread(string(cvtest::TS::ptr()->get_data_path()) + fileName, flags); -} - -Mat readImageType(const string &fname, int type) -{ - Mat src = readImage(fname, CV_MAT_CN(type) == 1 ? IMREAD_GRAYSCALE : IMREAD_COLOR); - if (CV_MAT_CN(type) == 4) - { - Mat temp; - cvtColor(src, temp, cv::COLOR_BGR2BGRA); - swap(src, temp); - } - src.convertTo(src, CV_MAT_DEPTH(type)); - return src; -} - -double checkNorm(const Mat &m) -{ - return norm(m, NORM_INF); -} - -double checkNorm(const Mat &m1, const Mat &m2) -{ - return norm(m1, m2, NORM_INF); -} - -double checkSimilarity(const Mat &m1, const Mat &m2) -{ - Mat diff; - matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED); - return std::abs(diff.at(0, 0) - 1.f); -} - -/* -void cv::ocl::PrintTo(const DeviceInfo& info, ostream* os) -{ - (*os) << info.name(); -} -*/ - -void PrintTo(const Inverse &inverse, std::ostream *os) -{ - if (inverse) - (*os) << "inverse"; - else - (*os) << "direct"; -} diff --git a/modules/ocl/perf/utility.hpp b/modules/ocl/perf/utility.hpp deleted file mode 100644 index 7d34b6731..000000000 --- a/modules/ocl/perf/utility.hpp +++ /dev/null @@ -1,182 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#ifndef __OPENCV_TEST_UTILITY_HPP__ -#define __OPENCV_TEST_UTILITY_HPP__ -//#define PRINT_KERNEL_RUN_TIME -#ifdef PRINT_KERNEL_RUN_TIME -#define LOOP_TIMES 1 -#else -#define LOOP_TIMES 1 -#endif -#define MWIDTH 1920 -#define MHEIGHT 1080 -#define CLBINPATH ".\\" -#define LOOPROISTART 0 -#define LOOPROIEND 1 -int randomInt(int minVal, int maxVal); -double randomDouble(double minVal, double maxVal); - -//std::string generateVarList(int first,...); -std::string generateVarList(int &p1, int &p2); -cv::Size randomSize(int minVal, int maxVal); -cv::Scalar randomScalar(double minVal, double maxVal); -cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = 255.0); - -void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); - -//! return true if device supports specified feature and gpu module was built with support the feature. -//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); - -//! return all devices compatible with current gpu module build. -//const std::vector& devices(); -//! return all devices compatible with current gpu module build which support specified feature. -//std::vector devices(cv::gpu::FeatureSet feature); - -//! read image from testdata folder. -cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR); -cv::Mat readImageType(const std::string &fname, int type); - -double checkNorm(const cv::Mat &m); -double checkNorm(const cv::Mat &m1, const cv::Mat &m2); -double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2); - -#define EXPECT_MAT_NORM(mat, eps) \ -{ \ - EXPECT_LE(checkNorm(cv::Mat(mat)), eps) \ -} - -/*#define EXPECT_MAT_NEAR(mat1, mat2, eps) \ -{ \ - ASSERT_EQ(mat1.type(), mat2.type()); \ - ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps); \ -}*/ - -#define EXPECT_MAT_NEAR(mat1, mat2, eps,s) \ -{ \ - ASSERT_EQ(mat1.type(), mat2.type()); \ - ASSERT_EQ(mat1.size(), mat2.size()); \ - EXPECT_LE(checkNorm(cv::Mat(mat1), cv::Mat(mat2)), eps)< types(int depth_start, int depth_end, int cn_start, int cn_end); - -//! return vector with all types (depth: CV_8U-CV_64F, channels: 1-4). -const std::vector &all_types(); - -class Inverse -{ -public: - inline Inverse(bool val = false) : val_(val) {} - - inline operator bool() const - { - return val_; - } - -private: - bool val_; -}; - -void PrintTo(const Inverse &useRoi, std::ostream *os); - -CV_ENUM(CmpCode, cv::CMP_EQ, cv::CMP_GT, cv::CMP_GE, cv::CMP_LT, cv::CMP_LE, cv::CMP_NE) - -CV_ENUM(NormCode, cv::NORM_INF, cv::NORM_L1, cv::NORM_L2, cv::NORM_TYPE_MASK, cv::NORM_RELATIVE, cv::NORM_MINMAX) - -enum {FLIP_BOTH = 0, FLIP_X = 1, FLIP_Y = -1}; -CV_ENUM(FlipCode, FLIP_BOTH, FLIP_X, FLIP_Y) - -CV_ENUM(ReduceOp, CV_REDUCE_SUM, CV_REDUCE_AVG, CV_REDUCE_MAX, CV_REDUCE_MIN) - -CV_FLAGS(GemmFlags, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T); - -CV_ENUM(MorphOp, cv::MORPH_OPEN, cv::MORPH_CLOSE, cv::MORPH_GRADIENT, cv::MORPH_TOPHAT, cv::MORPH_BLACKHAT) - -CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV) - -CV_ENUM(Interpolation, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC) - -CV_ENUM(Border, cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT, cv::BORDER_REFLECT, cv::BORDER_WRAP) - -CV_FLAGS(WarpFlags, cv::INTER_NEAREST, cv::INTER_LINEAR, cv::INTER_CUBIC, cv::WARP_INVERSE_MAP) - -CV_ENUM(TemplateMethod, cv::TM_SQDIFF, cv::TM_SQDIFF_NORMED, cv::TM_CCORR, cv::TM_CCORR_NORMED, cv::TM_CCOEFF, cv::TM_CCOEFF_NORMED) - -CV_FLAGS(DftFlags, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT) - -void run_perf_test(); - -#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > - -#define GET_PARAM(k) std::tr1::get< k >(GetParam()) - -#define ALL_DEVICES testing::ValuesIn(devices()) -#define DEVICES(feature) testing::ValuesIn(devices(feature)) - -#define ALL_TYPES testing::ValuesIn(all_types()) -#define TYPES(depth_start, depth_end, cn_start, cn_end) testing::ValuesIn(types(depth_start, depth_end, cn_start, cn_end)) - -#define DIFFERENT_SIZES testing::Values(cv::Size(128, 128), cv::Size(113, 113)) - -#define DIRECT_INVERSE testing::Values(Inverse(false), Inverse(true)) - -#endif // __OPENCV_TEST_UTILITY_HPP__ diff --git a/samples/ocl/performance.cpp b/samples/ocl/performance.cpp deleted file mode 100644 index 695516f14..000000000 --- a/samples/ocl/performance.cpp +++ /dev/null @@ -1,4397 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include -#include "opencv2/core/core.hpp" -#include "opencv2/imgproc/imgproc.hpp" -#include "opencv2/highgui/highgui.hpp" -#include "opencv2/calib3d/calib3d.hpp" -#include "opencv2/video/video.hpp" -#include "opencv2/nonfree/nonfree.hpp" -#include "opencv2/objdetect/objdetect.hpp" -#include "opencv2/features2d/features2d.hpp" -#define USE_OPENCL -#ifdef USE_OPENCL -#include "opencv2/ocl/ocl.hpp" -#include "opencv2/nonfree/ocl.hpp" -#endif - -#define TAB " " - -using namespace std; -using namespace cv; - -// This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files -// All images needed in this test are in samples/gpu folder. -// For haar template, haarcascade_frontalface_alt.xml shouold be in working directory - -void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high); -string abspath(const string &relpath); -int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *); -typedef struct -{ - short x; - short y; -} COOR; -COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, - cv::Size size, int sp, int sr, int maxIter, float eps, int *tab); -void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, - int sp, int sr, cv::TermCriteria crit); - -class Runnable -{ -public: - explicit Runnable(const std::string &runname): name_(runname) {} - virtual ~Runnable() {} - - const std::string &name() const - { - return name_; - } - - virtual void run() = 0; - -private: - std::string name_; -}; - -class TestSystem -{ -public: - static TestSystem &instance() - { - static TestSystem me; - return me; - } - - void setWorkingDir(const std::string &val) - { - working_dir_ = val; - } - const std::string &workingDir() const - { - return working_dir_; - } - - void setTestFilter(const std::string &val) - { - test_filter_ = val; - } - const std::string &testFilter() const - { - return test_filter_; - } - - void setNumIters(int num_iters) - { - num_iters_ = num_iters; - } - void setGPUWarmupIters(int num_iters) - { - gpu_warmup_iters_ = num_iters; - } - void setCPUIters(int num_iters) - { - cpu_num_iters_ = num_iters; - } - - void setTopThreshold(double top) - { - top_ = top; - } - void setBottomThreshold(double bottom) - { - bottom_ = bottom; - } - - void addInit(Runnable *init) - { - inits_.push_back(init); - } - void addTest(Runnable *test) - { - tests_.push_back(test); - } - void run(); - - // It's public because OpenCV callback uses it - void printError(const std::string &msg); - - std::stringstream &startNewSubtest() - { - finishCurrentSubtest(); - return cur_subtest_description_; - } - - bool stop() const - { - return cur_iter_idx_ >= num_iters_; - } - - bool cpu_stop() const - { - return cur_iter_idx_ >= cpu_num_iters_; - } - - bool warmupStop() - { - return cur_warmup_idx_++ >= gpu_warmup_iters_; - } - - void warmupComplete() - { - cur_warmup_idx_ = 0; - } - - void cpuOn() - { - cpu_started_ = cv::getTickCount(); - } - void cpuOff() - { - int64 delta = cv::getTickCount() - cpu_started_; - cpu_times_.push_back(delta); - ++cur_iter_idx_; - } - void cpuComplete() - { - cpu_elapsed_ += meanTime(cpu_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - void gpuOn() - { - gpu_started_ = cv::getTickCount(); - } - void gpuOff() - { - int64 delta = cv::getTickCount() - gpu_started_; - gpu_times_.push_back(delta); - ++cur_iter_idx_; - } - void gpuComplete() - { - gpu_elapsed_ += meanTime(gpu_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - void gpufullOn() - { - gpu_full_started_ = cv::getTickCount(); - } - void gpufullOff() - { - int64 delta = cv::getTickCount() - gpu_full_started_; - gpu_full_times_.push_back(delta); - ++cur_iter_idx_; - } - void gpufullComplete() - { - gpu_full_elapsed_ += meanTime(gpu_full_times_); - cur_subtest_is_empty_ = false; - cur_iter_idx_ = 0; - } - - bool isListMode() const - { - return is_list_mode_; - } - void setListMode(bool value) - { - is_list_mode_ = value; - } - - void setRecordName(const std::string &name) - { - recordname_ = name; - } - - void setCurrentTest(const std::string &name) - { - itname_ = name; - itname_changed_ = true; - } - -private: - TestSystem(): - cur_subtest_is_empty_(true), cpu_elapsed_(0), - gpu_elapsed_(0), gpu_full_elapsed_(0), speedup_total_(0.0), - num_subtests_called_(0), - speedup_faster_count_(0), speedup_slower_count_(0), speedup_equal_count_(0), - speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false), - num_iters_(10), cpu_num_iters_(2), - gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0), - record_(0), recordname_("performance"), itname_changed_(true) - { - cpu_times_.reserve(num_iters_); - gpu_times_.reserve(num_iters_); - gpu_full_times_.reserve(num_iters_); - } - - void finishCurrentSubtest(); - void resetCurrentSubtest() - { - cpu_elapsed_ = 0; - gpu_elapsed_ = 0; - gpu_full_elapsed_ = 0; - cur_subtest_description_.str(""); - cur_subtest_is_empty_ = true; - cur_iter_idx_ = 0; - cpu_times_.clear(); - gpu_times_.clear(); - gpu_full_times_.clear(); - } - - double meanTime(const std::vector &samples); - - void printHeading(); - void printSummary(); - void printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup); - - void writeHeading(); - void writeSummary(); - void writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, - double speedup, double fullspeedup, - double gpu_min, double gpu_max, double std_dev); - - std::string working_dir_; - std::string test_filter_; - - std::vector inits_; - std::vector tests_; - - std::stringstream cur_subtest_description_; - bool cur_subtest_is_empty_; - - int64 cpu_started_; - int64 gpu_started_; - int64 gpu_full_started_; - double cpu_elapsed_; - double gpu_elapsed_; - double gpu_full_elapsed_; - - double speedup_total_; - double speedup_full_total_; - int num_subtests_called_; - - int speedup_faster_count_; - int speedup_slower_count_; - int speedup_equal_count_; - - int speedup_full_faster_count_; - int speedup_full_slower_count_; - int speedup_full_equal_count_; - - bool is_list_mode_; - - double top_; - double bottom_; - - int num_iters_; - int cpu_num_iters_; //there's no need to set cpu running same times with gpu - int gpu_warmup_iters_; //gpu warm up times, default is 1 - int cur_iter_idx_; - int cur_warmup_idx_; //current gpu warm up times - std::vector cpu_times_; - std::vector gpu_times_; - std::vector gpu_full_times_; - - FILE *record_; - std::string recordname_; - std::string itname_; - bool itname_changed_; -}; - - -#define GLOBAL_INIT(name) \ - struct name##_init: Runnable { \ - name##_init(): Runnable(#name) { \ - TestSystem::instance().addInit(this); \ - } \ - void run(); \ - } name##_init_instance; \ - void name##_init::run() - - -#define TEST(name) \ - struct name##_test: Runnable { \ - name##_test(): Runnable(#name) { \ - TestSystem::instance().addTest(this); \ - } \ - void run(); \ - } name##_test_instance; \ - void name##_test::run() - -#define SUBTEST TestSystem::instance().startNewSubtest() - -#define CPU_ON \ - while (!TestSystem::instance().cpu_stop()) { \ - TestSystem::instance().cpuOn() -#define CPU_OFF \ - TestSystem::instance().cpuOff(); \ - } TestSystem::instance().cpuComplete() - -#define GPU_ON \ - while (!TestSystem::instance().stop()) { \ - TestSystem::instance().gpuOn() -#define GPU_OFF \ - TestSystem::instance().gpuOff(); \ - } TestSystem::instance().gpuComplete() - -#define GPU_FULL_ON \ - while (!TestSystem::instance().stop()) { \ - TestSystem::instance().gpufullOn() -#define GPU_FULL_OFF \ - TestSystem::instance().gpufullOff(); \ - } TestSystem::instance().gpufullComplete() - -#define WARMUP_ON \ - while (!TestSystem::instance().warmupStop()) { -#define WARMUP_OFF \ - } TestSystem::instance().warmupComplete() - -void TestSystem::run() -{ - if (is_list_mode_) - { - for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) - { - cout << (*it)->name() << endl; - } - - return; - } - - // Run test initializers - for (vector::iterator it = inits_.begin(); it != inits_.end(); ++it) - { - if ((*it)->name().find(test_filter_, 0) != string::npos) - { - (*it)->run(); - } - } - - printHeading(); - writeHeading(); - - // Run tests - for (vector::iterator it = tests_.begin(); it != tests_.end(); ++it) - { - try - { - if ((*it)->name().find(test_filter_, 0) != string::npos) - { - cout << endl << (*it)->name() << ":\n"; - - setCurrentTest((*it)->name()); - //fprintf(record_,"%s\n",(*it)->name().c_str()); - - (*it)->run(); - finishCurrentSubtest(); - } - } - catch (const Exception &) - { - // Message is printed via callback - resetCurrentSubtest(); - } - catch (const runtime_error &e) - { - printError(e.what()); - resetCurrentSubtest(); - } - } - -#ifdef USE_OPENCL - printSummary(); - writeSummary(); -#endif -} - - -void TestSystem::finishCurrentSubtest() -{ - if (cur_subtest_is_empty_) - // There is no need to print subtest statistics - { - return; - } - - double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0; - double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0; - double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0; - - double speedup = static_cast(cpu_elapsed_) / std::max(1.0, gpu_elapsed_); - speedup_total_ += speedup; - - double fullspeedup = static_cast(cpu_elapsed_) / std::max(1.0, gpu_full_elapsed_); - speedup_full_total_ += fullspeedup; - - if (speedup > top_) - { - speedup_faster_count_++; - } - else if (speedup < bottom_) - { - speedup_slower_count_++; - } - else - { - speedup_equal_count_++; - } - - if (fullspeedup > top_) - { - speedup_full_faster_count_++; - } - else if (fullspeedup < bottom_) - { - speedup_full_slower_count_++; - } - else - { - speedup_full_equal_count_++; - } - - // compute min, max and - std::sort(gpu_times_.begin(), gpu_times_.end()); - double gpu_min = gpu_times_.front() / getTickFrequency() * 1000.0; - double gpu_max = gpu_times_.back() / getTickFrequency() * 1000.0; - double deviation = 0; - - if (gpu_times_.size() > 1) - { - double sum = 0; - - for (size_t i = 0; i < gpu_times_.size(); i++) - { - int64 diff = gpu_times_[i] - static_cast(gpu_elapsed_); - double diff_time = diff * 1000 / getTickFrequency(); - sum += diff_time * diff_time; - } - - deviation = std::sqrt(sum / gpu_times_.size()); - } - - printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup); - writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation); - - num_subtests_called_++; - resetCurrentSubtest(); -} - - -double TestSystem::meanTime(const vector &samples) -{ - double sum = accumulate(samples.begin(), samples.end(), 0.); - return sum / samples.size(); -} - - -void TestSystem::printHeading() -{ - cout << endl; - cout << setiosflags(ios_base::left); -#ifdef USE_OPENCL - cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms" - << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP" - << "DESCRIPTION\n"; -#else - cout << TAB << setw(10) << "CPU, ms\n"; -#endif - cout << resetiosflags(ios_base::left); -} - -void TestSystem::writeHeading() -{ - if (!record_) - { -#ifdef USE_OPENCL - recordname_ += "_OCL.csv"; -#else - recordname_ += "_CPU.csv"; -#endif - record_ = fopen(recordname_.c_str(), "w"); - } - -#ifdef USE_OPENCL - fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n"); -#else - fprintf(record_, "NAME,DESCRIPTION,CPU (ms)\n"); -#endif - fflush(record_); -} - -void TestSystem::printSummary() -{ - cout << setiosflags(ios_base::fixed); - cout << "\naverage GPU speedup: x" - << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_) - << endl; - cout << "\nGPU exceeded: " - << setprecision(3) << speedup_faster_count_ - << "\nGPU passed: " - << setprecision(3) << speedup_equal_count_ - << "\nGPU failed: " - << setprecision(3) << speedup_slower_count_ - << endl; - cout << "\nGPU exceeded rate: " - << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPU passed rate: " - << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPU failed rate: " - << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << endl; - cout << "\naverage GPUTOTAL speedup: x" - << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_) - << endl; - cout << "\nGPUTOTAL exceeded: " - << setprecision(3) << speedup_full_faster_count_ - << "\nGPUTOTAL passed: " - << setprecision(3) << speedup_full_equal_count_ - << "\nGPUTOTAL failed: " - << setprecision(3) << speedup_full_slower_count_ - << endl; - cout << "\nGPUTOTAL exceeded rate: " - << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPUTOTAL passed rate: " - << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << "\nGPUTOTAL failed rate: " - << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 - << "%" - << endl; - cout << resetiosflags(ios_base::fixed); -} - - -void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup) -{ - cout << TAB << setiosflags(ios_base::left); - stringstream stream; - - stream << cpu_time; - cout << setw(10) << stream.str(); -#ifdef USE_OPENCL - stream.str(""); - stream << gpu_time; - cout << setw(10) << stream.str(); - - stream.str(""); - stream << "x" << setprecision(3) << speedup; - cout << setw(14) << stream.str(); - - stream.str(""); - stream << gpu_full_time; - cout << setw(14) << stream.str(); - - stream.str(""); - stream << "x" << setprecision(3) << fullspeedup; - cout << setw(14) << stream.str(); -#endif - cout << cur_subtest_description_.str(); - cout << resetiosflags(ios_base::left) << endl; -} - -void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev) -{ - if (!record_) - { - recordname_ += ".csv"; - record_ = fopen(recordname_.c_str(), "w"); - } - -#ifdef USE_OPENCL - fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "", - cur_subtest_description_.str().c_str(), - cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup, - gpu_min, gpu_max, std_dev); -#else - fprintf(record_, "%s,%s,%.3f\n", - itname_changed_ ? itname_.c_str() : "", cur_subtest_description_.str().c_str(), cpu_time); -#endif - - if (itname_changed_) - { - itname_changed_ = false; - } - - fflush(record_); -} - -void TestSystem::writeSummary() -{ - if (!record_) - { - recordname_ += ".csv"; - record_ = fopen(recordname_.c_str(), "w"); - } - - fprintf(record_, "\nAverage GPU speedup: %.3f\n" - "exceeded: %d (%.3f%%)\n" - "passed: %d (%.3f%%)\n" - "failed: %d (%.3f%%)\n" - "\nAverage GPUTOTAL speedup: %.3f\n" - "exceeded: %d (%.3f%%)\n" - "passed: %d (%.3f%%)\n" - "failed: %d (%.3f%%)\n", - speedup_total_ / std::max(1, num_subtests_called_), - speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100, - speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100, - speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100, - speedup_full_total_ / std::max(1, num_subtests_called_), - speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100, - speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100, - speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100 - ); - fflush(record_); -} - -void TestSystem::printError(const std::string &msg) -{ - cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl; -} - -void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high) -{ - mat.create(rows, cols, type); - RNG rng(0); - rng.fill(mat, RNG::UNIFORM, low, high); -} - - -string abspath(const string &relpath) -{ - return TestSystem::instance().workingDir() + relpath; -} - - -int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/, - const char *err_msg, const char * /*file_name*/, - int /*line*/, void * /*userdata*/) -{ - TestSystem::instance().printError(err_msg); - return 0; -} - -/////////// matchTemplate //////////////////////// -//void InitMatchTemplate() -//{ -// Mat src; gen(src, 500, 500, CV_32F, 0, 1); -// Mat templ; gen(templ, 500, 500, CV_32F, 0, 1); -//#ifdef USE_OPENCL -// ocl::oclMat d_src(src), d_templ(templ), d_dst; -// ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); -//#endif -//} -TEST(matchTemplate) -{ - //InitMatchTemplate(); - - Mat src, templ, dst; - int templ_size = 5; - - - for (int size = 1000; size <= 4000; size *= 2) - { - int all_type[] = {CV_32FC1, CV_32FC4}; - std::string type_name[] = {"CV_32FC1", "CV_32FC4"}; - - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - for(templ_size = 5; templ_size <= 5; templ_size *= 5) - { - gen(src, size, size, all_type[j], 0, 1); - - SUBTEST << src.cols << 'x' << src.rows << "; " << type_name[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR"; - - gen(templ, templ_size, templ_size, all_type[j], 0, 1); - - matchTemplate(src, templ, dst, CV_TM_CCORR); - - CPU_ON; - matchTemplate(src, templ, dst, CV_TM_CCORR); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src), d_templ, d_dst; - - d_templ.upload(templ); - - WARMUP_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - WARMUP_OFF; - - GPU_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_templ.upload(templ); - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } - - int all_type_8U[] = {CV_8UC1}; - std::string type_name_8U[] = {"CV_8UC1"}; - - for (size_t j = 0; j < sizeof(all_type_8U) / sizeof(int); j++) - { - for(templ_size = 5; templ_size <= 5; templ_size *= 5) - { - SUBTEST << src.cols << 'x' << src.rows << "; " << type_name_8U[j] << "; templ " << templ_size << 'x' << templ_size << "; CCORR_NORMED"; - - gen(src, size, size, all_type_8U[j], 0, 255); - - gen(templ, templ_size, templ_size, all_type_8U[j], 0, 255); - - matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); - - CPU_ON; - matchTemplate(src, templ, dst, CV_TM_CCORR_NORMED); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_templ(templ), d_dst; - - WARMUP_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - WARMUP_OFF; - - GPU_ON; - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_templ.upload(templ); - ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } - } -} - -///////////// PyrLKOpticalFlow //////////////////////// -TEST(PyrLKOpticalFlow) -{ - std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"}; - std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"}; - - for (size_t i = 0; i < sizeof(images1) / sizeof(std::string); i++) - { - Mat frame0 = imread(abspath(images1[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); - - if (frame0.empty()) - { - std::string errstr = "can't open " + images1[i]; - throw runtime_error(errstr); - } - - Mat frame1 = imread(abspath(images2[i]), i == 0 ? IMREAD_COLOR : IMREAD_GRAYSCALE); - - if (frame1.empty()) - { - std::string errstr = "can't open " + images2[i]; - throw runtime_error(errstr); - } - - Mat gray_frame; - - if (i == 0) - { - cvtColor(frame0, gray_frame, COLOR_BGR2GRAY); - } - - for (int points = 1000; points <= 4000; points *= 2) - { - if (i == 0) - SUBTEST << frame0.cols << "x" << frame0.rows << "; color; " << points << " points"; - else - SUBTEST << frame0.cols << "x" << frame0.rows << "; gray; " << points << " points"; - Mat nextPts_cpu; - Mat status_cpu; - - vector pts; - goodFeaturesToTrack(i == 0 ? gray_frame : frame0, pts, points, 0.01, 0.0); - - vector nextPts; - vector status; - - vector err; - - calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); - - CPU_ON; - calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::PyrLKOpticalFlow d_pyrLK; - - ocl::oclMat d_frame0(frame0); - ocl::oclMat d_frame1(frame1); - - ocl::oclMat d_pts; - Mat pts_mat(1, (int)pts.size(), CV_32FC2, (void *)&pts[0]); - d_pts.upload(pts_mat); - - ocl::oclMat d_nextPts; - ocl::oclMat d_status; - ocl::oclMat d_err; - - WARMUP_ON; - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - WARMUP_OFF; - - GPU_ON; - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - GPU_OFF; - - GPU_FULL_ON; - d_frame0.upload(frame0); - d_frame1.upload(frame1); - d_pts.upload(pts_mat); - d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err); - - if (!d_nextPts.empty()) - { - d_nextPts.download(nextPts_cpu); - } - - if (!d_status.empty()) - { - d_status.download(status_cpu); - } - - GPU_FULL_OFF; -#endif - } - - } -} - - -///////////// pyrDown ////////////////////// -TEST(pyrDown) -{ - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - pyrDown(src, dst); - - CPU_ON; - pyrDown(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - - WARMUP_ON; - ocl::pyrDown(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::pyrDown(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::pyrDown(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } -} - -///////////// pyrUp //////////////////////// -TEST(pyrUp) -{ - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 500; size <= 2000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - pyrUp(src, dst); - - CPU_ON; - pyrUp(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - - WARMUP_ON; - ocl::pyrUp(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::pyrUp(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::pyrUp(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } -} - -///////////// Canny //////////////////////// -TEST(Canny) -{ - Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); - - if (img.empty()) - { - throw runtime_error("can't open aloeL.jpg"); - } - - SUBTEST << img.cols << 'x' << img.rows << "; aloeL.jpg" << "; edges" << "; CV_8UC1"; - - Mat edges(img.size(), CV_8UC1); - - CPU_ON; - Canny(img, edges, 50.0, 100.0); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_img(img); - ocl::oclMat d_edges; - ocl::CannyBuf d_buf; - - WARMUP_ON; - ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - WARMUP_OFF; - - GPU_ON; - ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - GPU_OFF; - - GPU_FULL_ON; - d_img.upload(img); - ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0); - d_edges.download(edges); - GPU_FULL_OFF; -#endif -} - -///////////// Haar //////////////////////// -#ifdef USE_OPENCL -namespace cv -{ -namespace ocl -{ - -struct getRect -{ - Rect operator()(const CvAvgComp &e) const - { - return e.rect; - } -}; - -class CascadeClassifier_GPU : public OclCascadeClassifier -{ -public: - void detectMultiScale(oclMat &image, - CV_OUT std::vector& faces, - double scaleFactor = 1.1, - int minNeighbors = 3, int flags = 0, - Size minSize = Size(), - Size maxSize = Size()) - { - (void)maxSize; - MemStorage storage(cvCreateMemStorage(0)); - //CvMat img=image; - CvSeq *objs = oclHaarDetectObjects(image, storage, scaleFactor, minNeighbors, flags, minSize); - vector vecAvgComp; - Seq(objs).copyTo(vecAvgComp); - faces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), faces.begin(), getRect()); - } - -}; - -} -} -#endif -TEST(Haar) -{ - Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE); - - if (img.empty()) - { - throw runtime_error("can't open basketball1.png"); - } - - CascadeClassifier faceCascadeCPU; - - if (!faceCascadeCPU.load(abspath("haarcascade_frontalface_alt.xml"))) - { - throw runtime_error("can't load haarcascade_frontalface_alt.xml"); - } - - vector faces; - - SUBTEST << img.cols << "x" << img.rows << "; scale image"; - CPU_ON; - faceCascadeCPU.detectMultiScale(img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::CascadeClassifier_GPU faceCascade; - - if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml"))) - { - throw runtime_error("can't load haarcascade_frontalface_alt.xml"); - } - - ocl::oclMat d_img(img); - - faces.clear(); - - WARMUP_ON; - faceCascade.detectMultiScale(d_img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - WARMUP_OFF; - - faces.clear(); - - GPU_ON; - faceCascade.detectMultiScale(d_img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - GPU_OFF; - - GPU_FULL_ON; - d_img.upload(img); - faceCascade.detectMultiScale(d_img, faces, - 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); - GPU_FULL_OFF; -#endif -} - -///////////// blend //////////////////////// -template -void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &weights1, const cv::Mat &weights2, cv::Mat &result_gold) -{ - result_gold.create(img1.size(), img1.type()); - - int cn = img1.channels(); - - for (int y = 0; y < img1.rows; ++y) - { - const float *weights1_row = weights1.ptr(y); - const float *weights2_row = weights2.ptr(y); - const T *img1_row = img1.ptr(y); - const T *img2_row = img2.ptr(y); - T *result_gold_row = result_gold.ptr(y); - - for (int x = 0; x < img1.cols * cn; ++x) - { - float w1 = weights1_row[x / cn]; - float w2 = weights2_row[x / cn]; - result_gold_row[x] = static_cast((img1_row[x] * w1 + img2_row[x] * w2) / (w1 + w2 + 1e-5f)); - } - } -} -TEST(blend) -{ - Mat src1, src2, weights1, weights2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " and CV_32FC1"; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(weights1, size, size, CV_32FC1, 0, 1); - gen(weights2, size, size, CV_32FC1, 0, 1); - - blendLinearGold(src1, src2, weights1, weights2, dst); - - CPU_ON; - blendLinearGold(src1, src2, weights1, weights2, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - d_weights1.upload(weights1); - d_weights2.upload(weights2); - - WARMUP_ON; - ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - d_weights1.upload(weights1); - d_weights2.upload(weights2); - ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - } -} -///////////// columnSum//////////////////////// -TEST(columnSum) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; CV_32FC1"; - - gen(src, size, size, CV_32FC1, 0, 256); - - CPU_ON; - dst.create(src.size(), src.type()); - - for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) - { - dst.at(i, j) = src.at(i, j) += src.at(i - 1, j); - } - } - - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - WARMUP_ON; - ocl::columnSum(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::columnSum(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::columnSum(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -///////////// HOG//////////////////////// -TEST(HOG) -{ - Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE); - - if (src.empty()) - { - throw runtime_error("can't open road.png"); - } - - - cv::HOGDescriptor hog; - hog.setSVMDetector(hog.getDefaultPeopleDetector()); - std::vector found_locations; - - SUBTEST << 768 << 'x' << 576 << "; road.png"; - - hog.detectMultiScale(src, found_locations); - - CPU_ON; - hog.detectMultiScale(src, found_locations); - CPU_OFF; - -#ifdef USE_OPENCL - cv::ocl::HOGDescriptor ocl_hog; - ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); - ocl::oclMat d_src; - d_src.upload(src); - - WARMUP_ON; - ocl_hog.detectMultiScale(d_src, found_locations); - WARMUP_OFF; - - GPU_ON; - ocl_hog.detectMultiScale(d_src, found_locations); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl_hog.detectMultiScale(d_src, found_locations); - GPU_FULL_OFF; -#endif -} - -///////////// SURF //////////////////////// - -TEST(SURF) -{ - Mat keypoints_cpu; - Mat descriptors_cpu; - - Mat src = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE); - - if (src.empty()) - { - throw runtime_error("can't open aloeL.jpg"); - } - - SUBTEST << src.cols << "x" << src.rows << "; aloeL.jpg"; - SURF surf; - vector keypoints; - Mat descriptors; - - surf(src, Mat(), keypoints, descriptors); - - CPU_ON; - keypoints.clear(); - surf(src, Mat(), keypoints, descriptors); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::SURF_OCL d_surf; - ocl::oclMat d_src(src); - ocl::oclMat d_keypoints; - ocl::oclMat d_descriptors; - - WARMUP_ON; - d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); - WARMUP_OFF; - - GPU_ON; - d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_surf(d_src, ocl::oclMat(), d_keypoints, d_descriptors); - - if (!d_keypoints.empty()) - { - d_keypoints.download(keypoints_cpu); - } - - if (!d_descriptors.empty()) - { - d_descriptors.download(descriptors_cpu); - } - - GPU_FULL_OFF; -#endif -} -//////////////////// BruteForceMatch ///////////////// -TEST(BruteForceMatcher) -{ - Mat trainIdx_cpu; - Mat distance_cpu; - Mat allDist_cpu; - Mat nMatches_cpu; - - for (int size = 1000; size <= 4000; size *= 2) - { - // Init CPU matcher - int desc_len = 64; - - BFMatcher matcher(NORM_L2); - - Mat query; - gen(query, size, desc_len, CV_32F, 0, 1); - - Mat train; - gen(train, size, desc_len, CV_32F, 0, 1); - // Output - vector< vector > matches(2); -#ifdef USE_OPENCL - // Init GPU matcher - ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist); - - ocl::oclMat d_query(query); - ocl::oclMat d_train(train); - - ocl::oclMat d_trainIdx, d_distance, d_allDist, d_nMatches; -#endif - SUBTEST << size << "; match"; - - matcher.match(query, train, matches[0]); - - CPU_ON; - matcher.match(query, train, matches[0]); - CPU_OFF; - -#ifdef USE_OPENCL - WARMUP_ON; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); - WARMUP_OFF; - - GPU_ON; - d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance); - GPU_OFF; - - GPU_FULL_ON; - d_query.upload(query); - d_train.upload(train); - d_matcher.match(d_query, d_train, matches[0]); - GPU_FULL_OFF; -#endif - - SUBTEST << size << "; knnMatch"; - - matcher.knnMatch(query, train, matches, 2); - - CPU_ON; - matcher.knnMatch(query, train, matches, 2); - CPU_OFF; - -#ifdef USE_OPENCL - WARMUP_ON; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); - WARMUP_OFF; - - GPU_ON; - d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2); - GPU_OFF; - - GPU_FULL_ON; - d_query.upload(query); - d_train.upload(train); - d_matcher.knnMatch(d_query, d_train, matches, 2); - GPU_FULL_OFF; -#endif - SUBTEST << size << "; radiusMatch"; - - float max_distance = 2.0f; - - matcher.radiusMatch(query, train, matches, max_distance); - - CPU_ON; - matcher.radiusMatch(query, train, matches, max_distance); - CPU_OFF; - -#ifdef USE_OPENCL - d_trainIdx.release(); - - WARMUP_ON; - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); - WARMUP_OFF; - - GPU_ON; - d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance); - GPU_OFF; - - GPU_FULL_ON; - d_query.upload(query); - d_train.upload(train); - d_matcher.radiusMatch(d_query, d_train, matches, max_distance); - GPU_FULL_OFF; -#endif - } -} -///////////// Lut //////////////////////// -TEST(lut) -{ - Mat src, lut, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_lut, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC3}; - std::string type_name[] = {"CV_8UC1", "CV_8UC3"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src, size, size, all_type[j], 0, 256); - gen(lut, 1, 256, CV_8UC1, 0, 1); - gen(dst, size, size, all_type[j], 0, 256); - - LUT(src, lut, dst); - - CPU_ON; - LUT(src, lut, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - d_lut.upload(lut); - - WARMUP_ON; - ocl::LUT(d_src, d_lut, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::LUT(d_src, d_lut, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_lut.upload(lut); - ocl::LUT(d_src, d_lut, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Exp //////////////////////// -TEST(Exp) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; CV_32FC1"; - - gen(src, size, size, CV_32FC1, 0, 256); - gen(dst, size, size, CV_32FC1, 0, 256); - - exp(src, dst); - - CPU_ON; - exp(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::exp(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::exp(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::exp(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -///////////// LOG //////////////////////// -TEST(Log) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 32F"; - - gen(src, size, size, CV_32F, 1, 10); - - log(src, dst); - - CPU_ON; - log(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::log(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::log(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::log(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -///////////// Add //////////////////////// - -TEST(Add) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 1); - gen(src2, size, size, all_type[j], 0, 1); - - add(src1, src2, dst); - - CPU_ON; - add(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::add(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::add(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::add(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Mul //////////////////////// -TEST(Mul) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - multiply(src1, src2, dst); - - CPU_ON; - multiply(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::multiply(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::multiply(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::multiply(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Div //////////////////////// -TEST(Div) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - divide(src1, src2, dst); - - CPU_ON; - divide(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::divide(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::divide(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::divide(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Absdiff //////////////////////// -TEST(Absdiff) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - absdiff(src1, src2, dst); - - CPU_ON; - absdiff(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::absdiff(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::absdiff(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::absdiff(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// CartToPolar //////////////////////// -TEST(CartToPolar) -{ - Mat src1, src2, dst, dst1; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst, d_dst1; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - gen(dst1, size, size, all_type[j], 0, 256); - - - cartToPolar(src1, src2, dst, dst1, 1); - - CPU_ON; - cartToPolar(src1, src2, dst, dst1, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - WARMUP_OFF; - - GPU_ON; - ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::cartToPolar(d_src1, d_src2, d_dst, d_dst1, 1); - d_dst.download(dst); - d_dst1.download(dst1); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// PolarToCart //////////////////////// -TEST(PolarToCart) -{ - Mat src1, src2, dst, dst1; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst, d_dst1; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - gen(dst1, size, size, all_type[j], 0, 256); - - - polarToCart(src1, src2, dst, dst1, 1); - - CPU_ON; - polarToCart(src1, src2, dst, dst1, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - WARMUP_OFF; - - GPU_ON; - ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::polarToCart(d_src1, d_src2, d_dst, d_dst1, 1); - d_dst.download(dst); - d_dst1.download(dst1); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Magnitude //////////////////////// -TEST(magnitude) -{ - Mat x, y, mag; -#ifdef USE_OPENCL - ocl::oclMat d_x, d_y, d_mag; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(x, size, size, all_type[j], 0, 1); - gen(y, size, size, all_type[j], 0, 1); - - magnitude(x, y, mag); - - CPU_ON; - magnitude(x, y, mag); - CPU_OFF; -#ifdef USE_OPENCL - d_x.upload(x); - d_y.upload(y); - - WARMUP_ON; - ocl::magnitude(d_x, d_y, d_mag); - WARMUP_OFF; - - GPU_ON; - ocl::magnitude(d_x, d_y, d_mag); - GPU_OFF; - - GPU_FULL_ON; - d_x.upload(x); - d_y.upload(y); - ocl::magnitude(d_x, d_y, d_mag); - d_mag.download(mag); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Transpose //////////////////////// -TEST(Transpose) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - transpose(src, dst); - - CPU_ON; - transpose(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::transpose(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::transpose(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::transpose(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Flip //////////////////////// -TEST(Flip) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; FLIP_BOTH"; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - flip(src, dst, 0); - - CPU_ON; - flip(src, dst, 0); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::flip(d_src, d_dst, 0); - WARMUP_OFF; - - GPU_ON; - ocl::flip(d_src, d_dst, 0); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::flip(d_src, d_dst, 0); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// minMax //////////////////////// -TEST(minMax) -{ - Mat src; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - double min_val, max_val; - Point min_loc, max_loc; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src, size, size, all_type[j], 0, 256); - - CPU_ON; - minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::minMax(d_src, &min_val, &max_val); - WARMUP_OFF; - - GPU_ON; - ocl::minMax(d_src, &min_val, &max_val); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::minMax(d_src, &min_val, &max_val); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// minMaxLoc //////////////////////// -TEST(minMaxLoc) -{ - Mat src; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - double min_val, max_val; - Point min_loc, max_loc; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 1); - - CPU_ON; - minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - WARMUP_OFF; - - GPU_ON; - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Sum //////////////////////// -TEST(Sum) -{ - Mat src; - Scalar cpures, gpures; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - cpures = sum(src); - - CPU_ON; - cpures = sum(src); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - gpures = ocl::sum(d_src); - WARMUP_OFF; - - GPU_ON; - gpures = ocl::sum(d_src); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - gpures = ocl::sum(d_src); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// countNonZero //////////////////////// -TEST(countNonZero) -{ - Mat src; -#ifdef USE_OPENCL - ocl::oclMat d_src; -#endif - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - countNonZero(src); - - CPU_ON; - countNonZero(src); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::countNonZero(d_src); - WARMUP_OFF; - - GPU_ON; - ocl::countNonZero(d_src); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::countNonZero(d_src); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Phase //////////////////////// -TEST(Phase) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - phase(src1, src2, dst, 1); - - CPU_ON; - phase(src1, src2, dst, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::phase(d_src1, d_src2, d_dst, 1); - WARMUP_OFF; - - GPU_ON; - ocl::phase(d_src1, d_src2, d_dst, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::phase(d_src1, d_src2, d_dst, 1); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// bitwise_and//////////////////////// -TEST(bitwise_and) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_and(src1, src2, dst); - - CPU_ON; - bitwise_and(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_and(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_and(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_and(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// bitwise_or//////////////////////// -TEST(bitwise_or) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_or(src1, src2, dst); - - CPU_ON; - bitwise_or(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_or(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_or(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// bitwise_xor//////////////////////// -TEST(bitwise_xor) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_xor(src1, src2, dst); - - CPU_ON; - bitwise_xor(src1, src2, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_xor(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::bitwise_xor(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// bitwise_not//////////////////////// -TEST(bitwise_not) -{ - Mat src1, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32SC1}; - std::string type_name[] = {"CV_8UC1", "CV_32SC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - bitwise_not(src1, dst); - - CPU_ON; - bitwise_not(src1, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - - WARMUP_ON; - ocl::bitwise_not(d_src1, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::bitwise_not(d_src1, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - ocl::bitwise_not(d_src1, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// compare//////////////////////// -TEST(compare) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int CMP_EQ = 0; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - compare(src1, src2, dst, CMP_EQ); - - CPU_ON; - compare(src1, src2, dst, CMP_EQ); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - WARMUP_OFF; - - GPU_ON; - ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::compare(d_src1, d_src2, d_dst, CMP_EQ); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// pow //////////////////////// -TEST(pow) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 100); - gen(dst, size, size, all_type[j], 0, 100); - - pow(src, -2.0, dst); - - CPU_ON; - pow(src, -2.0, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - d_dst.upload(dst); - - WARMUP_ON; - ocl::pow(d_src, -2.0, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::pow(d_src, -2.0, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::pow(d_src, -2.0, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// MagnitudeSqr//////////////////////// -TEST(MagnitudeSqr) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - int all_type[] = {CV_32FC1}; - std::string type_name[] = {"CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) - { - SUBTEST << size << 'x' << size << "; " << type_name[t]; - - gen(src1, size, size, all_type[t], 0, 256); - gen(src2, size, size, all_type[t], 0, 256); - gen(dst, size, size, all_type[t], 0, 256); - - - for (int i = 0; i < src1.rows; ++i) - - for (int j = 0; j < src1.cols; ++j) - { - float val1 = src1.at(i, j); - float val2 = src2.at(i, j); - - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; - - } - - CPU_ON; - - for (int i = 0; i < src1.rows; ++i) - for (int j = 0; j < src1.cols; ++j) - { - float val1 = src1.at(i, j); - float val2 = src2.at(i, j); - - ((float *)(dst.data))[i * dst.step / 4 + j] = val1 * val1 + val2 * val2; - - } - - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::magnitudeSqr(d_src1, d_src2, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// AddWeighted//////////////////////// -TEST(AddWeighted) -{ - Mat src1, src2, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_dst; -#endif - double alpha = 2.0, beta = 1.0, gama = 3.0; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(src2, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - addWeighted(src1, alpha, src2, beta, gama, dst); - - CPU_ON; - addWeighted(src1, alpha, src2, beta, gama, dst); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - - WARMUP_ON; - ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - ocl::addWeighted(d_src1, alpha, d_src2, beta, gama, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Blur//////////////////////// -TEST(Blur) -{ - Mat src1, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_dst; -#endif - Size ksize = Size(3, 3); - int bordertype = BORDER_CONSTANT; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - blur(src1, dst, ksize, Point(-1, -1), bordertype); - - CPU_ON; - blur(src1, dst, ksize, Point(-1, -1), bordertype); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - - WARMUP_ON; - ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - WARMUP_OFF; - - GPU_ON; - ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Laplacian//////////////////////// -TEST(Laplacian) -{ - Mat src1, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_dst; -#endif - int ksize = 3; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src1, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - - - Laplacian(src1, dst, -1, ksize, 1); - - CPU_ON; - Laplacian(src1, dst, -1, ksize, 1); - CPU_OFF; -#ifdef USE_OPENCL - d_src1.upload(src1); - - WARMUP_ON; - ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - WARMUP_OFF; - - GPU_ON; - ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - ocl::Laplacian(d_src1, d_dst, -1, ksize, 1); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Erode //////////////////// -TEST(Erode) -{ - Mat src, dst, ker; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(256)); - ker = getStructuringElement(MORPH_RECT, Size(3, 3)); - - erode(src, dst, ker); - - CPU_ON; - erode(src, dst, ker); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::erode(d_src, d_dst, ker); - WARMUP_OFF; - - GPU_ON; - ocl::erode(d_src, d_dst, ker); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::erode(d_src, d_dst, ker); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Sobel //////////////////////// -TEST(Sobel) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int dx = 1; - int dy = 1; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - Sobel(src, dst, -1, dx, dy); - - CPU_ON; - Sobel(src, dst, -1, dx, dy); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::Sobel(d_src, d_dst, -1, dx, dy); - WARMUP_OFF; - - GPU_ON; - ocl::Sobel(d_src, d_dst, -1, dx, dy); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::Sobel(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Scharr //////////////////////// -TEST(Scharr) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int dx = 1; - int dy = 0; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - Scharr(src, dst, -1, dx, dy); - - CPU_ON; - Scharr(src, dst, -1, dx, dy); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::Scharr(d_src, d_dst, -1, dx, dy); - WARMUP_OFF; - - GPU_ON; - ocl::Scharr(d_src, d_dst, -1, dx, dy); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::Scharr(d_src, d_dst, -1, dx, dy); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// GaussianBlur //////////////////////// -TEST(GaussianBlur) -{ - Mat src, dst; - int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - GaussianBlur(src, dst, Size(9, 9), 0); - - CPU_ON; - GaussianBlur(src, dst, Size(9, 9), 0); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst(src.size(), src.type()); - ocl::oclMat d_buf; - - WARMUP_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - WARMUP_OFF; - - GPU_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// equalizeHist //////////////////////// -TEST(equalizeHist) -{ - Mat src, dst; - int all_type[] = {CV_8UC1}; - std::string type_name[] = {"CV_8UC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - equalizeHist(src, dst); - - CPU_ON; - equalizeHist(src, dst); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - ocl::oclMat d_hist; - ocl::oclMat d_buf; - - WARMUP_ON; - ocl::equalizeHist(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::equalizeHist(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::equalizeHist(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -/////////// CopyMakeBorder ////////////////////// -TEST(CopyMakeBorder) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_dst; -#endif - int bordertype = BORDER_CONSTANT; - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - - gen(src, size, size, all_type[j], 0, 256); - - copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - - CPU_ON; - copyMakeBorder(src, dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - - WARMUP_ON; - ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - WARMUP_OFF; - - GPU_ON; - ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0)); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// cornerMinEigenVal //////////////////////// -TEST(cornerMinEigenVal) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_dst; -#endif - int blockSize = 7, apertureSize = 1 + 2 * (rand() % 4); - int borderType = BORDER_REFLECT; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - - gen(src, size, size, all_type[j], 0, 256); - - cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); - - CPU_ON; - cornerMinEigenVal(src, dst, blockSize, apertureSize, borderType); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - - WARMUP_ON; - ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - WARMUP_OFF; - - GPU_ON; - ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// cornerHarris //////////////////////// -TEST(cornerHarris) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; BORDER_REFLECT"; - - gen(src, size, size, all_type[j], 0, 1); - - cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); - - CPU_ON; - cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - WARMUP_OFF; - - GPU_ON; - ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - - } -} -///////////// integral //////////////////////// -TEST(integral) -{ - Mat src, sum; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_sum, d_buf; -#endif - int all_type[] = {CV_8UC1}; - std::string type_name[] = {"CV_8UC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - integral(src, sum); - - CPU_ON; - integral(src, sum); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::integral(d_src, d_sum); - WARMUP_OFF; - - GPU_ON; - ocl::integral(d_src, d_sum); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::integral(d_src, d_sum); - d_sum.download(sum); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// WarpAffine //////////////////////// -TEST(WarpAffine) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - static const double coeffs[2][3] = - { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0} - }; - Mat M(2, 3, CV_64F, (void *)coeffs); - int interpolation = INTER_NEAREST; - - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - Size size1 = Size(size, size); - - warpAffine(src, dst, M, size1, interpolation); - - CPU_ON; - warpAffine(src, dst, M, size1, interpolation); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - WARMUP_OFF; - - GPU_ON; - ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::warpAffine(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// WarpPerspective //////////////////////// -TEST(WarpPerspective) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - static const double coeffs[3][3] = - { - {cos(3.14 / 6), -sin(3.14 / 6), 100.0}, - {sin(3.14 / 6), cos(3.14 / 6), -100.0}, - {0.0, 0.0, 1.0} - }; - Mat M(3, 3, CV_64F, (void *)coeffs); - int interpolation = INTER_NEAREST; - - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - gen(dst, size, size, all_type[j], 0, 256); - Size size1 = Size(size, size); - - warpPerspective(src, dst, M, size1, interpolation); - - CPU_ON; - warpPerspective(src, dst, M, size1, interpolation); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - WARMUP_OFF; - - GPU_ON; - ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::warpPerspective(d_src, d_dst, M, size1, interpolation); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// resize //////////////////////// -TEST(resize) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; up"; - - gen(src, size, size, all_type[j], 0, 256); - - resize(src, dst, Size(), 2.0, 2.0); - - CPU_ON; - resize(src, dst, Size(), 2.0, 2.0); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - WARMUP_OFF; - - GPU_ON; - ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::resize(d_src, d_dst, Size(), 2.0, 2.0); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; down"; - - gen(src, size, size, all_type[j], 0, 256); - - resize(src, dst, Size(), 0.5, 0.5); - - CPU_ON; - resize(src, dst, Size(), 0.5, 0.5); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - WARMUP_OFF; - - GPU_ON; - ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::resize(d_src, d_dst, Size(), 0.5, 0.5); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// threshold//////////////////////// -TEST(threshold) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 8UC1; THRESH_BINARY"; - - gen(src, size, size, CV_8U, 0, 100); - - threshold(src, dst, 50.0, 0.0, THRESH_BINARY); - - CPU_ON; - threshold(src, dst, 50.0, 0.0, THRESH_BINARY); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - WARMUP_OFF; - - GPU_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 32FC1; THRESH_TRUNC [NPP]"; - - gen(src, size, size, CV_32FC1, 0, 100); - - threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); - - CPU_ON; - threshold(src, dst, 50.0, 0.0, THRESH_TRUNC); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - WARMUP_OFF; - - GPU_ON; - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} -///////////// meanShiftFiltering//////////////////////// -TEST(meanShiftFiltering) -{ - int sp = 10, sr = 10; - - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4"; - - gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256)); - - pyrMeanShiftFiltering(src, dst, sp, sr); - - CPU_ON; - pyrMeanShiftFiltering(src, dst, sp, sr); - CPU_OFF; -#ifdef USE_OPENCL - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - - d_src.upload(src); - - WARMUP_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - WARMUP_OFF; - - GPU_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} -///////////// meanShiftProc//////////////////////// -COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab) -{ - - int isr2 = sr * sr; - int c0, c1, c2, c3; - int iter; - uchar *ptr = NULL; - uchar *pstart = NULL; - int revx = 0, revy = 0; - c0 = sptr[0]; - c1 = sptr[1]; - c2 = sptr[2]; - c3 = sptr[3]; - - // iterate meanshift procedure - for (iter = 0; iter < maxIter; iter++) - { - int count = 0; - int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0; - - //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp) - int minx = x0 - sp; - int miny = y0 - sp; - int maxx = x0 + sp; - int maxy = y0 + sp; - - //deal with the image boundary - if (minx < 0) - { - minx = 0; - } - - if (miny < 0) - { - miny = 0; - } - - if (maxx >= size.width) - { - maxx = size.width - 1; - } - - if (maxy >= size.height) - { - maxy = size.height - 1; - } - - if (iter == 0) - { - pstart = sptr; - } - else - { - pstart = pstart + revy * sstep + (revx << 2); //point to the new position - } - - ptr = pstart; - ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row - - for (int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2)) - { - int rowCount = 0; - int x = minx; -#if CV_ENABLE_UNROLLED - - for (; x + 4 <= maxx; x += 4, ptr += 16) - { - int t0, t1, t2; - t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - - t0 = ptr[4], t1 = ptr[5], t2 = ptr[6]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 1; - rowCount++; - } - - t0 = ptr[8], t1 = ptr[9], t2 = ptr[10]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 2; - rowCount++; - } - - t0 = ptr[12], t1 = ptr[13], t2 = ptr[14]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x + 3; - rowCount++; - } - } - -#endif - - for (; x <= maxx; x++, ptr += 4) - { - int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2]; - - if (tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2) - { - s0 += t0; - s1 += t1; - s2 += t2; - sx += x; - rowCount++; - } - } - - if (rowCount == 0) - { - continue; - } - - count += rowCount; - sy += y * rowCount; - } - - if (count == 0) - { - break; - } - - int x1 = sx / count; - int y1 = sy / count; - s0 = s0 / count; - s1 = s1 / count; - s2 = s2 / count; - - bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) + - tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps); - - //revise the pointer corresponding to the new (y0,x0) - revx = x1 - x0; - revy = y1 - y0; - - x0 = x1; - y0 = y1; - c0 = s0; - c1 = s1; - c2 = s2; - - if (stopFlag) - { - break; - } - } //for iter - - dptr[0] = (uchar)c0; - dptr[1] = (uchar)c1; - dptr[2] = (uchar)c2; - dptr[3] = (uchar)c3; - - COOR coor; - coor.x = static_cast(x0); - coor.y = static_cast(y0); - return coor; -} - -void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit) -{ - - if (src_roi.empty()) - { - CV_Error(CV_StsBadArg, "The input image is empty"); - } - - if (src_roi.depth() != CV_8U || src_roi.channels() != 4) - { - CV_Error(CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported"); - } - - CV_Assert((src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) && - (src_roi.cols == dstCoor_roi.cols) && (src_roi.rows == dstCoor_roi.rows)); - CV_Assert(!(dstCoor_roi.step & 0x3)); - - if (!(crit.type & cv::TermCriteria::MAX_ITER)) - { - crit.maxCount = 5; - } - - int maxIter = std::min(std::max(crit.maxCount, 1), 100); - float eps; - - if (!(crit.type & cv::TermCriteria::EPS)) - { - eps = 1.f; - } - - eps = (float)std::max(crit.epsilon, 0.0); - - int tab[512]; - - for (int i = 0; i < 512; i++) - { - tab[i] = (i - 255) * (i - 255); - } - - uchar *sptr = src_roi.data; - uchar *dptr = dst_roi.data; - short *dCoorptr = (short *)dstCoor_roi.data; - int sstep = (int)src_roi.step; - int dstep = (int)dst_roi.step; - int dCoorstep = (int)dstCoor_roi.step >> 1; - cv::Size size = src_roi.size(); - - for (int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2), - dptr += dstep - (size.width << 2), dCoorptr += dCoorstep - (size.width << 1)) - { - for (int j = 0; j < size.width; j++, sptr += 4, dptr += 4, dCoorptr += 2) - { - *((COOR *)dCoorptr) = do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab); - } - } - -} -TEST(meanShiftProc) -{ - Mat src, dst, dstCoor_roi; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst, d_dstCoor_roi; -#endif - TermCriteria crit(TermCriteria::COUNT + TermCriteria::EPS, 5, 1); - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; 8UC4 and CV_16SC2 "; - - gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256)); - gen(dstCoor_roi, size, size, CV_16SC2, Scalar::all(0), Scalar::all(256)); - - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); - - CPU_ON; - meanShiftProc_(src, dst, dstCoor_roi, 5, 6, crit); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - WARMUP_OFF; - - GPU_ON; - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit); - d_dst.download(dst); - d_dstCoor_roi.download(dstCoor_roi); - GPU_FULL_OFF; -#endif - } -} -///////////// ConvertTo//////////////////////// -TEST(ConvertTo) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " to 32FC1"; - - gen(src, size, size, all_type[j], 0, 256); - //gen(dst, size, size, all_type[j], 0, 256); - - //d_dst.upload(dst); - - src.convertTo(dst, CV_32FC1); - - CPU_ON; - src.convertTo(dst, CV_32FC1); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - d_src.convertTo(d_dst, CV_32FC1); - WARMUP_OFF; - - GPU_ON; - d_src.convertTo(d_dst, CV_32FC1); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_src.convertTo(d_dst, CV_32FC1); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// copyTo//////////////////////// -TEST(copyTo) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - //gen(dst, size, size, all_type[j], 0, 256); - - //d_dst.upload(dst); - - src.copyTo(dst); - - CPU_ON; - src.copyTo(dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - d_src.copyTo(d_dst); - WARMUP_OFF; - - GPU_ON; - d_src.copyTo(d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_src.copyTo(d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// setTo//////////////////////// -TEST(setTo) -{ - Mat src, dst; - Scalar val(1, 2, 3, 4); -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - - gen(src, size, size, all_type[j], 0, 256); - - src.setTo(val); - - CPU_ON; - src.setTo(val); - CPU_OFF; -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - d_src.setTo(val); - WARMUP_OFF; - - GPU_ON; - d_src.setTo(val); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - d_src.setTo(val); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// Merge//////////////////////// -TEST(Merge) -{ - Mat dst; -#ifdef USE_OPENCL - ocl::oclMat d_dst; -#endif - int channels = 4; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] ; - Size size1 = Size(size, size); - std::vector src(channels); - - for (int i = 0; i < channels; ++i) - { - src[i] = Mat(size1, all_type[j], cv::Scalar::all(i)); - } - - merge(src, dst); - - CPU_ON; - merge(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - std::vector d_src(channels); - - for (int i = 0; i < channels; ++i) - { - d_src[i] = ocl::oclMat(size1, all_type[j], cv::Scalar::all(i)); - } - - WARMUP_ON; - ocl::merge(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::merge(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - - for (int i = 0; i < channels; ++i) - { - d_src[i] = ocl::oclMat(size1, CV_8U, cv::Scalar::all(i)); - } - - ocl::merge(d_src, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// Split//////////////////////// -TEST(Split) -{ - //int channels = 4; - int all_type[] = {CV_8UC1, CV_32FC1}; - std::string type_name[] = {"CV_8UC1", "CV_32FC1"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j]; - Size size1 = Size(size, size); - - Mat src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); - - std::vector dst; - - split(src, dst); - - CPU_ON; - split(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - ocl::oclMat d_src(size1, CV_MAKE_TYPE(all_type[j], 4), cv::Scalar(1, 2, 3, 4)); - std::vector d_dst; - - WARMUP_ON; - ocl::split(d_src, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::split(d_src, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::split(d_src, d_dst); - GPU_FULL_OFF; -#endif - } - - } -} - - -///////////// norm//////////////////////// -TEST(norm) -{ - Mat src, buf; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_buf; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size << "; CV_8UC1; NORM_INF"; - - gen(src, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - gen(buf, size, size, CV_8UC1, Scalar::all(0), Scalar::all(1)); - - norm(src, NORM_INF); - - CPU_ON; - norm(src, NORM_INF); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - d_buf.upload(buf); - - WARMUP_ON; - ocl::norm(d_src, d_buf, NORM_INF); - WARMUP_OFF; - - GPU_ON; - ocl::norm(d_src, d_buf, NORM_INF); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::norm(d_src, d_buf, NORM_INF); - GPU_FULL_OFF; -#endif - } -} -///////////// remap//////////////////////// -TEST(remap) -{ - Mat src, dst, xmap, ymap; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst, d_xmap, d_ymap; -#endif - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - int interpolation = INTER_LINEAR; - int borderMode = BORDER_CONSTANT; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t t = 0; t < sizeof(all_type) / sizeof(int); t++) - { - SUBTEST << size << 'x' << size << "; src " << type_name[t] << "; map CV_32FC1"; - - gen(src, size, size, all_type[t], 0, 256); - - xmap.create(size, size, CV_32FC1); - dst.create(size, size, CV_32FC1); - ymap.create(size, size, CV_32FC1); - - for (int i = 0; i < size; ++i) - { - float *xmap_row = xmap.ptr(i); - float *ymap_row = ymap.ptr(i); - - for (int j = 0; j < size; ++j) - { - xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f; - ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f; - } - } - - - remap(src, dst, xmap, ymap, interpolation, borderMode); - - CPU_ON; - remap(src, dst, xmap, ymap, interpolation, borderMode); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - d_dst.upload(dst); - d_xmap.upload(xmap); - d_ymap.upload(ymap); - - WARMUP_ON; - ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - WARMUP_OFF; - - GPU_ON; - ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} -///////////// cvtColor//////////////////////// -TEST(cvtColor) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - int all_type[] = {CV_8UC4}; - std::string type_name[] = {"CV_8UC4"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - gen(src, size, size, all_type[j], 0, 256); - SUBTEST << size << "x" << size << "; " << type_name[j] << " ; CV_RGBA2GRAY"; - - cvtColor(src, dst, CV_RGBA2GRAY, 4); - - CPU_ON; - cvtColor(src, dst, CV_RGBA2GRAY, 4); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - WARMUP_OFF; - - GPU_ON; - ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - - } - - -} -///////////// filter2D//////////////////////// -TEST(filter2D) -{ - Mat src; - - for (int size = 1000; size <= 4000; size *= 2) - { - int all_type[] = {CV_8UC1, CV_8UC4}; - std::string type_name[] = {"CV_8UC1", "CV_8UC4"}; - - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - gen(src, size, size, all_type[j], 0, 256); - - for (int ksize = 3; ksize <= 15; ksize = 2*ksize+1) - { - SUBTEST << "ksize = " << ksize << "; " << size << 'x' << size << "; " << type_name[j] ; - - Mat kernel; - gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0); - - Mat dst; - cv::filter2D(src, dst, -1, kernel); - - CPU_ON; - cv::filter2D(src, dst, -1, kernel); - CPU_OFF; -#ifdef USE_OPENCL - ocl::oclMat d_src(src); - ocl::oclMat d_dst; - - WARMUP_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - WARMUP_OFF; - - GPU_ON; - ocl::filter2D(d_src, d_dst, -1, kernel); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::filter2D(d_src, d_dst, -1, kernel); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } - - - } -} - - -///////////// dft //////////////////////// -TEST(dft) -{ - Mat src, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src, d_dst; -#endif - - int all_type[] = {CV_32FC1, CV_32FC2}; - std::string type_name[] = {"CV_32FC1", "CV_32FC2"}; - - for (int size = 1000; size <= 4000; size *= 2) - { - for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) - { - SUBTEST << size << 'x' << size << "; " << type_name[j] << " ; complex-to-complex"; - - gen(src, size, size, all_type[j], Scalar::all(0), Scalar::all(1)); - - dft(src, dst); - - CPU_ON; - dft(src, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src.upload(src); - - WARMUP_ON; - ocl::dft(d_src, d_dst, Size(size, size)); - WARMUP_OFF; - - GPU_ON; - ocl::dft(d_src, d_dst, Size(size, size)); - GPU_OFF; - - GPU_FULL_ON; - d_src.upload(src); - ocl::dft(d_src, d_dst, Size(size, size)); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } - - } -} - -///////////// gemm //////////////////////// -TEST(gemm) -{ - Mat src1, src2, src3, dst; -#ifdef USE_OPENCL - ocl::oclMat d_src1, d_src2, d_src3, d_dst; -#endif - - for (int size = 1000; size <= 4000; size *= 2) - { - SUBTEST << size << 'x' << size; - - gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10)); - - gemm(src1, src2, 1.0, src3, 1.0, dst); - - CPU_ON; - gemm(src1, src2, 1.0, src3, 1.0, dst); - CPU_OFF; - -#ifdef USE_OPENCL - d_src1.upload(src1); - d_src2.upload(src2); - d_src3.upload(src3); - - WARMUP_ON; - ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - WARMUP_OFF; - - GPU_ON; - ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - GPU_OFF; - - GPU_FULL_ON; - d_src1.upload(src1); - d_src2.upload(src2); - d_src3.upload(src3); - ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst); - d_dst.download(dst); - GPU_FULL_OFF; -#endif - } -} - -int main(int argc, const char *argv[]) -{ -#ifdef USE_OPENCL - vector oclinfo; - int num_devices = getDevice(oclinfo); - - if (num_devices < 1) - { - cerr << "no device found\n"; - return -1; - } - - int devidx = 0; - - for (size_t i = 0; i < oclinfo.size(); i++) - { - for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++) - { - printf("device %d: %s\n", devidx++, oclinfo[i].DeviceName[j].c_str()); - } - } - -#endif - redirectError(cvErrorCallback); - - const char *keys = - "{ h | help | false | print help message }" - "{ f | filter | | filter for test }" - "{ w | workdir | | set working directory }" - "{ l | list | false | show all tests }" - "{ d | device | 0 | device id }" - "{ i | iters | 10 | iteration count }" - "{ m | warmup | 1 | gpu warm up iteration count}" - "{ t | xtop | 1.1 | xfactor top boundary}" - "{ b | xbottom | 0.9 | xfactor bottom boundary}" - "{ v | verify | false | only run gpu once to verify if problems occur}"; - - CommandLineParser cmd(argc, argv, keys); - - if (cmd.get("help")) - { - cout << "Avaible options:" << endl; - cmd.printParams(); - return 0; - } - -#ifdef USE_OPENCL - int device = cmd.get("device"); - - if (device < 0 || device >= num_devices) - { - cerr << "Invalid device ID" << endl; - return -1; - } - - if (cmd.get("verify")) - { - TestSystem::instance().setNumIters(1); - TestSystem::instance().setGPUWarmupIters(0); - TestSystem::instance().setCPUIters(0); - } - - devidx = 0; - - for (size_t i = 0; i < oclinfo.size(); i++) - { - for (size_t j = 0; j < oclinfo[i].DeviceName.size(); j++, devidx++) - { - if (device == devidx) - { - ocl::setDevice(oclinfo[i], (int)j); - TestSystem::instance().setRecordName(oclinfo[i].DeviceName[j]); - printf("\nuse %d: %s\n", devidx, oclinfo[i].DeviceName[j].c_str()); - goto END_DEV; - } - } - } - -END_DEV: - -#endif - string filter = cmd.get("filter"); - string workdir = cmd.get("workdir"); - bool list = cmd.get("list"); - int iters = cmd.get("iters"); - int wu_iters = cmd.get("warmup"); - double x_top = cmd.get("xtop"); - double x_bottom = cmd.get("xbottom"); - - TestSystem::instance().setTopThreshold(x_top); - TestSystem::instance().setBottomThreshold(x_bottom); - - if (!filter.empty()) - { - TestSystem::instance().setTestFilter(filter); - } - - if (!workdir.empty()) - { - if (workdir[workdir.size() - 1] != '/' && workdir[workdir.size() - 1] != '\\') - { - workdir += '/'; - } - - TestSystem::instance().setWorkingDir(workdir); - } - - if (list) - { - TestSystem::instance().setListMode(true); - } - - TestSystem::instance().setNumIters(iters); - TestSystem::instance().setGPUWarmupIters(wu_iters); - - TestSystem::instance().run(); - - return 0; -} From de95a2b278657d06d85cd032fd3fa6fe2f55eda2 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 27 Mar 2013 00:23:23 -0700 Subject: [PATCH 07/67] Video IO tests turned off for ARM WinRT. --- modules/highgui/test/test_ffmpeg.cpp | 2 +- modules/highgui/test/test_precomp.hpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/highgui/test/test_ffmpeg.cpp b/modules/highgui/test/test_ffmpeg.cpp index 53065462a..547060e7d 100644 --- a/modules/highgui/test/test_ffmpeg.cpp +++ b/modules/highgui/test/test_ffmpeg.cpp @@ -176,7 +176,7 @@ TEST(Highgui_Video, ffmpeg_image) { CV_FFmpegReadImageTest test; test.safe_run() #endif -#if defined(HAVE_FFMPEG) || defined(WIN32) || defined(_WIN32) +#if defined(HAVE_FFMPEG) || ((defined(WIN32) || defined(_WIN32)) && !defined(_M_ARM)) //////////////////////////////// Parallel VideoWriters and VideoCaptures //////////////////////////////////// diff --git a/modules/highgui/test/test_precomp.hpp b/modules/highgui/test/test_precomp.hpp index 3286c0f59..bdf3f38c8 100644 --- a/modules/highgui/test/test_precomp.hpp +++ b/modules/highgui/test/test_precomp.hpp @@ -48,7 +48,7 @@ defined(HAVE_AVFOUNDATION) || \ /*defined(HAVE_OPENNI) || too specialized */ \ defined(HAVE_FFMPEG) || \ - defined(WIN32) /* assume that we have ffmpeg */ + (defined(WIN32) && !defined(_M_ARM))/* assume that we have ffmpeg on x86 and no on ARM */ # define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 #else @@ -60,7 +60,7 @@ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ defined(HAVE_FFMPEG) || \ - defined(WIN32) /* assume that we have ffmpeg */ + (defined(WIN32) && !defined(_M_ARM)) /* assume that we have ffmpeg */ # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1 #else # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0 From 09bc99a0c0450358c6beff77db2967ae7cca1b9b Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 27 Mar 2013 08:03:51 -0700 Subject: [PATCH 08/67] HAVE_WIN32UI and HAVE_VFW checks and defines added. --- CMakeLists.txt | 10 ++++++++-- cmake/OpenCVFindLibsGUI.cmake | 10 ++++++++++ cmake/OpenCVFindLibsVideo.cmake | 11 ++++++++++- cmake/checks/vfwtest.cpp | 10 ++++++++++ cmake/checks/win32uitest.cpp | 11 +++++++++++ cmake/templates/cvconfig.h.cmake | 6 ++++++ modules/core/include/opencv2/core/types_c.h | 1 - modules/highgui/CMakeLists.txt | 2 +- modules/highgui/perf/perf_precomp.hpp | 6 ++---- modules/highgui/src/cap.cpp | 1 - modules/highgui/src/cap_ffmpeg.cpp | 4 ++-- modules/highgui/src/precomp.hpp | 8 -------- modules/highgui/src/window.cpp | 12 ++++++------ modules/highgui/test/test_ffmpeg.cpp | 2 +- modules/highgui/test/test_gui.cpp | 2 +- modules/highgui/test/test_precomp.hpp | 7 ++----- 16 files changed, 70 insertions(+), 33 deletions(-) create mode 100644 cmake/checks/vfwtest.cpp create mode 100644 cmake/checks/win32uitest.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 9b7f8c2d7..3a50ac377 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,6 +118,7 @@ OCV_OPTION(WITH_CUFFT "Include NVidia Cuda Fast Fourier Transform (FFT) OCV_OPTION(WITH_CUBLAS "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support" OFF IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_NVCUVID "Include NVidia Video Decoding library support" OFF IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS AND NOT APPLE) ) OCV_OPTION(WITH_EIGEN "Include Eigen2/Eigen3 support" ON) +OCV_OPTION(WITH_VFW "Include Video for Windows support" ON IF (WIN32)) OCV_OPTION(WITH_FFMPEG "Include FFMPEG support" ON IF (NOT ANDROID AND NOT IOS)) OCV_OPTION(WITH_GSTREAMER "Include Gstreamer support" ON IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_GTK "Include GTK support" ON IF (UNIX AND NOT APPLE AND NOT ANDROID) ) @@ -132,6 +133,7 @@ OCV_OPTION(WITH_PNG "Include PNG support" ON OCV_OPTION(WITH_PVAPI "Include Prosilica GigE support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_GIGEAPI "Include Smartek GigE support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_QT "Build with Qt Backend support" OFF IF (NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_WIN32UI "Build with Win32 UI Backend support" ON IF (WIN32) ) OCV_OPTION(WITH_QUICKTIME "Use QuickTime for Video I/O insted of QTKit" OFF IF APPLE ) OCV_OPTION(WITH_TBB "Include Intel TBB support" OFF IF (NOT IOS) ) OCV_OPTION(WITH_CSTRIPES "Include C= support" OFF IF WIN32 ) @@ -607,8 +609,8 @@ else() if(DEFINED WITH_QT) status(" QT 4.x:" NO) endif() - if(WIN32) - status(" Win32 UI:" ARM THEN NO ELSE YES) + if(DEFINED WITH_WIN32UI) + status(" Win32 UI:" HAVE_WIN32UI THEN YES ELSE NO) else() if(APPLE) if(WITH_CARBON) @@ -665,6 +667,10 @@ endif() status("") status(" Video I/O:") +if (DEFINED WITH_VFW) + status(" Video for Windows:" HAVE_VFW THEN YES ELSE NO) +endif(DEFINED WITH_VFW) + if(DEFINED WITH_1394) status(" DC1394 1.x:" HAVE_DC1394 THEN "YES (ver ${ALIASOF_libdc1394_VERSION})" ELSE NO) status(" DC1394 2.x:" HAVE_DC1394_2 THEN "YES (ver ${ALIASOF_libdc1394-2_VERSION})" ELSE NO) diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake index c883a80ce..14095442d 100644 --- a/cmake/OpenCVFindLibsGUI.cmake +++ b/cmake/OpenCVFindLibsGUI.cmake @@ -2,6 +2,16 @@ # Detect 3rd-party GUI libraries # ---------------------------------------------------------------------------- +#--- Win32 UI --- +ocv_clear_vars(HAVE_WIN32UI) +if(WITH_WIN32UI) + TRY_COMPILE(HAVE_WIN32UI + "${OPENCV_BINARY_DIR}/CMakeFiles/CMakeTmp" + "${OpenCV_SOURCE_DIR}/cmake/checks/win32uitest.cpp" + CMAKE_FLAGS "\"user32.lib\" \"gdi32.lib\"" + OUTPUT_VARIABLE OUTPUT) +endif(WITH_WIN32UI) + # --- QT4 --- ocv_clear_vars(HAVE_QT) if(WITH_QT) diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index 3556ba562..9cb7f7cf2 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -2,6 +2,15 @@ # Detect 3rd-party video IO libraries # ---------------------------------------------------------------------------- +ocv_clear_vars(HAVE_VFW) +if (WITH_VFW) + TRY_COMPILE(HAVE_VFW + "${OPENCV_BINARY_DIR}/CMakeFiles/CMakeTmp" + "${OpenCV_SOURCE_DIR}/cmake/checks/vfwtest.cpp" + CMAKE_FLAGS "-DLINK_LIBRARIES:STRING=vfw32" + OUTPUT_VARIABLE OUTPUT) + endif(WITH_VFW) + # --- GStreamer --- ocv_clear_vars(HAVE_GSTREAMER) if(WITH_GSTREAMER) @@ -37,7 +46,7 @@ if(WITH_PVAPI) set(PVAPI_SDK_SUBDIR x86) elseif(X86_64) set(PVAPI_SDK_SUBDIR x64) - elseif(CMAKE_SYSTEM_PROCESSOR MATCHES arm) + elseif(ARM) set(PVAPI_SDK_SUBDIR arm) endif() diff --git a/cmake/checks/vfwtest.cpp b/cmake/checks/vfwtest.cpp new file mode 100644 index 000000000..63d545788 --- /dev/null +++ b/cmake/checks/vfwtest.cpp @@ -0,0 +1,10 @@ + +#include +#include + +int main() +{ + AVIFileInit(); + AVIFileExit(); + return 0; +} \ No newline at end of file diff --git a/cmake/checks/win32uitest.cpp b/cmake/checks/win32uitest.cpp new file mode 100644 index 000000000..6f13a09cc --- /dev/null +++ b/cmake/checks/win32uitest.cpp @@ -0,0 +1,11 @@ +#include + +int main(int argc, char** argv) +{ + CreateWindow(NULL /*lpClassName*/, NULL /*lpWindowName*/, 0 /*dwStyle*/, 0 /*x*/, + 0 /*y*/, 0 /*nWidth*/, 0 /*nHeight*/, NULL /*hWndParent*/, NULL /*hMenu*/, + NULL /*hInstance*/, NULL /*lpParam*/); + DeleteDC(NULL); + + return 0; +} diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.cmake index 85522072e..ff6b5c89a 100644 --- a/cmake/templates/cvconfig.h.cmake +++ b/cmake/templates/cvconfig.h.cmake @@ -13,6 +13,9 @@ */ #cmakedefine HAVE_ALLOCA_H 1 +/* Video for Windows support */ +#cmakedefine HAVE_VFW + /* V4L capturing support */ #cmakedefine HAVE_CAMV4L @@ -55,6 +58,9 @@ /* GTK+ 2.0 Thread support */ #cmakedefine HAVE_GTHREAD +/* Win32 UI */ +#cmakedefine HAVE_WIN32UI + /* GTK+ 2.x toolkit */ #cmakedefine HAVE_GTK diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h index be959a51c..3a0830463 100644 --- a/modules/core/include/opencv2/core/types_c.h +++ b/modules/core/include/opencv2/core/types_c.h @@ -322,7 +322,6 @@ CV_INLINE int cvRound( double value ) return (int)lrint(value); # endif #else - // while this is not IEEE754-compliant rounding, it's usually a good enough approximation double intpart, fractpart; fractpart = modf(value, &intpart); if ((abs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0)) diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 59ec616d3..2888c9dcd 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -89,7 +89,7 @@ if(HAVE_QT) if(${_have_flag}) set_source_files_properties(${_RCC_OUTFILES} PROPERTIES COMPILE_FLAGS -Wno-missing-declarations) endif() -elseif(WIN32 AND NOT ARM) +elseif(HAVE_WIN32UI) list(APPEND highgui_srcs src/window_w32.cpp) elseif(HAVE_GTK) list(APPEND highgui_srcs src/window_gtk.cpp) diff --git a/modules/highgui/perf/perf_precomp.hpp b/modules/highgui/perf/perf_precomp.hpp index ec8a447fa..b43e038f6 100644 --- a/modules/highgui/perf/perf_precomp.hpp +++ b/modules/highgui/perf/perf_precomp.hpp @@ -21,8 +21,7 @@ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ /*defined(HAVE_OPENNI) || too specialized */ \ - defined(HAVE_FFMPEG) || \ - defined(WIN32) /* assume that we have ffmpeg */ + defined(HAVE_FFMPEG) # define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 #else @@ -33,8 +32,7 @@ defined(HAVE_GSTREAMER) || \ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ - defined(HAVE_FFMPEG) || \ - defined(WIN32) /* assume that we have ffmpeg */ + defined(HAVE_FFMPEG) # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1 #else # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0 diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index 2c754cadc..9befa7b91 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -200,7 +200,6 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) capture = cvCreateCameraCapture_DShow (index); if (capture) return capture; - return NULL; break; #endif diff --git a/modules/highgui/src/cap_ffmpeg.cpp b/modules/highgui/src/cap_ffmpeg.cpp index 22caab854..669ebda12 100644 --- a/modules/highgui/src/cap_ffmpeg.cpp +++ b/modules/highgui/src/cap_ffmpeg.cpp @@ -209,7 +209,7 @@ CvCapture* cvCreateFileCapture_FFMPEG_proxy(const char * filename) if( result->open( filename )) return result; delete result; -#if (defined WIN32 || defined _WIN32) && !defined _M_ARM +#ifdef HAVE_VFW return cvCreateFileCapture_VFW(filename); #else return 0; @@ -263,7 +263,7 @@ CvVideoWriter* cvCreateVideoWriter_FFMPEG_proxy( const char* filename, int fourc if( result->open( filename, fourcc, fps, frameSize, isColor != 0 )) return result; delete result; -#if (defined WIN32 || defined _WIN32) && !defined _M_ARM +#ifdef HAVE_VFW return cvCreateVideoWriter_VFW(filename, fourcc, fps, frameSize, isColor); #else return 0; diff --git a/modules/highgui/src/precomp.hpp b/modules/highgui/src/precomp.hpp index 64efbb90d..afa0735ee 100644 --- a/modules/highgui/src/precomp.hpp +++ b/modules/highgui/src/precomp.hpp @@ -103,14 +103,6 @@ struct CvVideoWriter virtual bool writeFrame(const IplImage*) { return false; } }; -#if (defined WIN32 || defined _WIN32) && !defined _M_ARM -#define HAVE_VFW 1 - -/* uncomment to enable CMUCamera1394 fireware camera module */ -//#define HAVE_CMU1394 1 -#endif - - CvCapture * cvCreateCameraCapture_V4L( int index ); CvCapture * cvCreateCameraCapture_DC1394( int index ); CvCapture * cvCreateCameraCapture_DC1394_2( int index ); diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index 12816f3cb..6d2953464 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -57,7 +57,7 @@ CV_IMPL void cvSetWindowProperty(const char* name, int prop_id, double prop_valu #if defined (HAVE_QT) cvSetModeWindow_QT(name,prop_value); - #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM + #elif defined(HAVE_WIN32UI) cvSetModeWindow_W32(name,prop_value); #elif defined (HAVE_GTK) cvSetModeWindow_GTK(name,prop_value); @@ -96,7 +96,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetModeWindow_QT(name); - #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM + #elif defined(HAVE_WIN32UI) return cvGetModeWindow_W32(name); #elif defined (HAVE_GTK) return cvGetModeWindow_GTK(name); @@ -113,7 +113,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetPropWindow_QT(name); - #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM + #elif defined(HAVE_WIN32UI) return cvGetPropWindowAutoSize_W32(name); #elif defined (HAVE_GTK) return cvGetPropWindowAutoSize_GTK(name); @@ -126,7 +126,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetRatioWindow_QT(name); - #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM + #elif defined(HAVE_WIN32UI) return cvGetRatioWindow_W32(name); #elif defined (HAVE_GTK) return cvGetRatioWindow_GTK(name); @@ -139,7 +139,7 @@ CV_IMPL double cvGetWindowProperty(const char* name, int prop_id) #if defined (HAVE_QT) return cvGetOpenGlProp_QT(name); - #elif (defined WIN32 || defined _WIN32) && !defined _M_ARM + #elif defined(HAVE_WIN32UI) return cvGetOpenGlProp_W32(name); #elif defined (HAVE_GTK) return cvGetOpenGlProp_GTK(name); @@ -450,7 +450,7 @@ int cv::createButton(const string&, ButtonCallback, void*, int , bool ) #endif -#if (defined WIN32 || defined _WIN32) && !defined _M_ARM // see window_w32.cpp +#if defined(HAVE_WIN32UI) // see window_w32.cpp #elif defined (HAVE_GTK) // see window_gtk.cpp #elif defined (HAVE_COCOA) // see window_carbon.cpp #elif defined (HAVE_CARBON) diff --git a/modules/highgui/test/test_ffmpeg.cpp b/modules/highgui/test/test_ffmpeg.cpp index 547060e7d..2bfd52723 100644 --- a/modules/highgui/test/test_ffmpeg.cpp +++ b/modules/highgui/test/test_ffmpeg.cpp @@ -176,7 +176,7 @@ TEST(Highgui_Video, ffmpeg_image) { CV_FFmpegReadImageTest test; test.safe_run() #endif -#if defined(HAVE_FFMPEG) || ((defined(WIN32) || defined(_WIN32)) && !defined(_M_ARM)) +#if defined(HAVE_FFMPEG) //////////////////////////////// Parallel VideoWriters and VideoCaptures //////////////////////////////////// diff --git a/modules/highgui/test/test_gui.cpp b/modules/highgui/test/test_gui.cpp index e802f296f..c2726a43c 100644 --- a/modules/highgui/test/test_gui.cpp +++ b/modules/highgui/test/test_gui.cpp @@ -43,7 +43,7 @@ #include "test_precomp.hpp" #include "opencv2/highgui/highgui.hpp" -#if defined HAVE_GTK || defined HAVE_QT || ((defined WIN32 || defined _WIN32) && !_M_ARM) || defined HAVE_CARBON || defined HAVE_COCOA +#if defined HAVE_GTK || defined HAVE_QT || defined HAVE_WIN32UI || defined HAVE_CARBON || defined HAVE_COCOA using namespace cv; using namespace std; diff --git a/modules/highgui/test/test_precomp.hpp b/modules/highgui/test/test_precomp.hpp index bdf3f38c8..bbc6b41c7 100644 --- a/modules/highgui/test/test_precomp.hpp +++ b/modules/highgui/test/test_precomp.hpp @@ -47,9 +47,7 @@ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ /*defined(HAVE_OPENNI) || too specialized */ \ - defined(HAVE_FFMPEG) || \ - (defined(WIN32) && !defined(_M_ARM))/* assume that we have ffmpeg on x86 and no on ARM */ - + defined(HAVE_FFMPEG) # define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 #else # define BUILD_WITH_VIDEO_INPUT_SUPPORT 0 @@ -59,8 +57,7 @@ defined(HAVE_GSTREAMER) || \ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ - defined(HAVE_FFMPEG) || \ - (defined(WIN32) && !defined(_M_ARM)) /* assume that we have ffmpeg */ + defined(HAVE_FFMPEG) # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1 #else # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0 From c6cab50c5cb2c789e654acad3f4a0f6d58dc069a Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 28 Mar 2013 00:01:52 -0700 Subject: [PATCH 09/67] Perf tests for Video IO on WInRT fixed. --- modules/highgui/perf/perf_precomp.hpp | 8 +++++--- platforms/winrt/arm.winrt.toolchain.cmake | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/modules/highgui/perf/perf_precomp.hpp b/modules/highgui/perf/perf_precomp.hpp index b43e038f6..529187d3b 100644 --- a/modules/highgui/perf/perf_precomp.hpp +++ b/modules/highgui/perf/perf_precomp.hpp @@ -20,8 +20,9 @@ defined(HAVE_GSTREAMER) || \ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ - /*defined(HAVE_OPENNI) || too specialized */ \ - defined(HAVE_FFMPEG) + defined(HAVE_FFMPEG) || \ + defined(HAVE_VFW) + /*defined(HAVE_OPENNI) too specialized */ \ # define BUILD_WITH_VIDEO_INPUT_SUPPORT 1 #else @@ -32,7 +33,8 @@ defined(HAVE_GSTREAMER) || \ defined(HAVE_QUICKTIME) || \ defined(HAVE_AVFOUNDATION) || \ - defined(HAVE_FFMPEG) + defined(HAVE_FFMPEG) || \ + defined(HAVE_VFW) # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 1 #else # define BUILD_WITH_VIDEO_OUTPUT_SUPPORT 0 diff --git a/platforms/winrt/arm.winrt.toolchain.cmake b/platforms/winrt/arm.winrt.toolchain.cmake index 01854b598..e8767f297 100644 --- a/platforms/winrt/arm.winrt.toolchain.cmake +++ b/platforms/winrt/arm.winrt.toolchain.cmake @@ -1,5 +1,5 @@ -set(CMAKE_SYSTEM_NAME Windows) # WindowsRT breaks cmake 2.8.10.2 and earler -set(CMAKE_SYSTEM_PROCESSOR "arm") +set(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_PROCESSOR "arm-v7a") set(CMAKE_REQUIRED_DEFINITIONS -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) \ No newline at end of file From 924b0ef7861c7e7fdc69befb3f25ff44258f6c20 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 29 Mar 2013 10:40:28 +0400 Subject: [PATCH 10/67] fixed bug in TVL1 optical flow: initial optical flow was not cleaned during the second call --- modules/gpu/src/tvl1flow.cpp | 22 ++++++++++++---------- modules/video/src/tvl1flow.cpp | 29 +++++++++++++++-------------- 2 files changed, 27 insertions(+), 24 deletions(-) diff --git a/modules/gpu/src/tvl1flow.cpp b/modules/gpu/src/tvl1flow.cpp index a16345ae0..b8322e2c4 100644 --- a/modules/gpu/src/tvl1flow.cpp +++ b/modules/gpu/src/tvl1flow.cpp @@ -130,6 +130,17 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::operator ()(const GpuMat& I0, const GpuM gpu::multiply(u1s[s], Scalar::all(0.5), u1s[s]); gpu::multiply(u2s[s], Scalar::all(0.5), u2s[s]); } + else + { + u1s[s].create(I0s[s].size(), CV_32FC1); + u2s[s].create(I0s[s].size(), CV_32FC1); + } + } + + if (!useInitialFlow) + { + u1s[nscales-1].setTo(Scalar::all(0)); + u2s[nscales-1].setTo(Scalar::all(0)); } // pyramidal structure for computing the optical flow @@ -174,18 +185,9 @@ void cv::gpu::OpticalFlowDual_TVL1_GPU::procOneScale(const GpuMat& I0, const Gpu CV_DbgAssert( I1.size() == I0.size() ); CV_DbgAssert( I1.type() == I0.type() ); - CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); + CV_DbgAssert( u1.size() == I0.size() ); CV_DbgAssert( u2.size() == u1.size() ); - if (u1.empty()) - { - u1.create(I0.size(), CV_32FC1); - u1.setTo(Scalar::all(0)); - - u2.create(I0.size(), CV_32FC1); - u2.setTo(Scalar::all(0)); - } - GpuMat I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); GpuMat I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); centeredGradient(I1, I1x, I1y); diff --git a/modules/video/src/tvl1flow.cpp b/modules/video/src/tvl1flow.cpp index bff1d7ec0..ddcdabdd3 100644 --- a/modules/video/src/tvl1flow.cpp +++ b/modules/video/src/tvl1flow.cpp @@ -169,13 +169,12 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray I0.convertTo(I0s[0], I0s[0].depth(), I0.depth() == CV_8U ? 1.0 : 255.0); I1.convertTo(I1s[0], I1s[0].depth(), I1.depth() == CV_8U ? 1.0 : 255.0); + u1s[0].create(I0.size()); + u2s[0].create(I0.size()); + if (useInitialFlow) { - u1s[0].create(I0.size()); - u2s[0].create(I0.size()); - Mat_ mv[] = {u1s[0], u2s[0]}; - split(_flow.getMat(), mv); } @@ -228,6 +227,17 @@ void OpticalFlowDual_TVL1::calc(InputArray _I0, InputArray _I1, InputOutputArray multiply(u1s[s], Scalar::all(0.5), u1s[s]); multiply(u2s[s], Scalar::all(0.5), u2s[s]); } + else + { + u1s[s].create(I0s[s].size()); + u2s[s].create(I0s[s].size()); + } + } + + if (!useInitialFlow) + { + u1s[nscales-1].setTo(Scalar::all(0)); + u2s[nscales-1].setTo(Scalar::all(0)); } // pyramidal structure for computing the optical flow @@ -793,18 +803,9 @@ void OpticalFlowDual_TVL1::procOneScale(const Mat_& I0, const Mat_ CV_DbgAssert( I1.size() == I0.size() ); CV_DbgAssert( I1.type() == I0.type() ); - CV_DbgAssert( u1.empty() || u1.size() == I0.size() ); + CV_DbgAssert( u1.size() == I0.size() ); CV_DbgAssert( u2.size() == u1.size() ); - if (u1.empty()) - { - u1.create(I0.size()); - u1.setTo(Scalar::all(0)); - - u2.create(I0.size()); - u2.setTo(Scalar::all(0)); - } - Mat_ I1x = I1x_buf(Rect(0, 0, I0.cols, I0.rows)); Mat_ I1y = I1y_buf(Rect(0, 0, I0.cols, I0.rows)); centeredGradient(I1, I1x, I1y); From b08432cfe772d64567d4c1c137ba8bf15519951f Mon Sep 17 00:00:00 2001 From: Dustin Spicuzza Date: Fri, 29 Mar 2013 17:37:00 -0400 Subject: [PATCH 11/67] Port dft.py sample from cv to cv2 --- samples/python2/dft.py | 100 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 100 insertions(+) create mode 100644 samples/python2/dft.py diff --git a/samples/python2/dft.py b/samples/python2/dft.py new file mode 100644 index 000000000..32a91e3b5 --- /dev/null +++ b/samples/python2/dft.py @@ -0,0 +1,100 @@ +#/usr/bin/env python + +import cv2 +import numpy as np +import sys + + +def shift_dft(src, dst=None): + ''' + Rearrange the quadrants of Fourier image so that the origin is at + the image center. Swaps quadrant 1 with 3, and 2 with 4. + + src and dst arrays must be equal size & type + ''' + + if dst is None: + dst = np.empty(src.shape, src.dtype) + elif src.shape != dst.shape: + raise ValueError("src and dst must have equal sizes") + elif src.dtype != dst.dtype: + raise TypeError("src and dst must have equal types") + + if src is dst: + ret = np.empty(src.shape, src.dtype) + else: + ret = dst + + h, w = src.shape[:2] + + cx1 = cx2 = w/2 + cy1 = cy2 = h/2 + + # if the size is odd, then adjust the bottom/right quadrants + if w % 2 != 0: + cx2 += 1 + if h % 2 != 0: + cy2 += 1 + + # swap quadrants + + # swap q1 and q3 + ret[h-cy1:, w-cx1:] = src[0:cy1 , 0:cx1 ] # q1 -> q3 + ret[0:cy2 , 0:cx2 ] = src[h-cy2:, w-cx2:] # q3 -> q1 + + # swap q2 and q4 + ret[0:cy2 , w-cx2:] = src[h-cy2:, 0:cx2 ] # q2 -> q4 + ret[h-cy1:, 0:cx1 ] = src[0:cy1 , w-cx1:] # q4 -> q2 + + if src is dst: + dst[:,:] = ret + + return dst + +if __name__ == "__main__": + + if len(sys.argv)>1: + im = cv2.imread(sys.argv[1]) + else : + im = cv2.imread('../c/baboon.jpg') + print "usage : python dft.py " + + # convert to grayscale + im = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) + h, w = im.shape[:2] + + realInput = im.astype(np.float64) + + # perform an optimally sized dft + dft_M = cv2.getOptimalDFTSize(w) + dft_N = cv2.getOptimalDFTSize(h) + + # copy A to dft_A and pad dft_A with zeros + dft_A = np.zeros((dft_N, dft_M, 2), dtype=np.float64) + dft_A[:h, :w, 0] = realInput + + # no need to pad bottom part of dft_A with zeros because of + # use of nonzeroRows parameter in cv2.dft() + cv2.dft(dft_A, dst=dft_A, nonzeroRows=h) + + cv2.imshow("win", im) + + # Split fourier into real and imaginary parts + image_Re, image_Im = cv2.split(dft_A) + + # Compute the magnitude of the spectrum Mag = sqrt(Re^2 + Im^2) + magnitude = cv2.sqrt(image_Re**2.0 + image_Im**2.0) + + # Compute log(1 + Mag) + log_spectrum = cv2.log(1.0 + magnitude) + + # Rearrange the quadrants of Fourier image so that the origin is at + # the image center + shift_dft(log_spectrum, log_spectrum) + + # normalize and display the results as rgb + cv2.normalize(log_spectrum, log_spectrum, 0.0, 1.0, cv2.cv.CV_MINMAX) + cv2.imshow("magnitude", log_spectrum) + + cv2.waitKey(0) + cv2.destroyAllWindows() From ea5225ef3ed1a41a327c5567c27db5cfd90fe3c7 Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Sun, 31 Mar 2013 13:40:09 +0400 Subject: [PATCH 12/67] Fix typo leading to heap corruption in OutputArray::create --- modules/core/src/matrix.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 11a4b0266..6abc6df91 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -1546,10 +1546,10 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all int _type = CV_MAT_TYPE(flags); for( size_t j = len0; j < len; j++ ) { - if( v[i].type() == _type ) + if( v[j].type() == _type ) continue; - CV_Assert( v[i].empty() ); - v[i].flags = (v[i].flags & ~CV_MAT_TYPE_MASK) | _type; + CV_Assert( v[j].empty() ); + v[j].flags = (v[j].flags & ~CV_MAT_TYPE_MASK) | _type; } } return; From b28677bd90cbb7679ae97dc0d05e89675bbf67aa Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:16:47 +0400 Subject: [PATCH 13/67] fixed misprint in MatOp::augAssignXor --- modules/core/src/matop.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/src/matop.cpp b/modules/core/src/matop.cpp index 736984e85..5c518146c 100644 --- a/modules/core/src/matop.cpp +++ b/modules/core/src/matop.cpp @@ -319,7 +319,7 @@ void MatOp::augAssignXor(const MatExpr& expr, Mat& m) const { Mat temp; expr.op->assign(expr, temp); - m /= temp; + m ^= temp; } From 10774ff068e701eb1ee378d9a1171c2b0e8e4d6a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:18:30 +0400 Subject: [PATCH 14/67] removed unnecessary tmp variable (convertTo can work in-place) --- modules/calib3d/test/test_stereomatching.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/calib3d/test/test_stereomatching.cpp b/modules/calib3d/test/test_stereomatching.cpp index 4b35dad99..2be50a017 100644 --- a/modules/calib3d/test/test_stereomatching.cpp +++ b/modules/calib3d/test/test_stereomatching.cpp @@ -459,14 +459,14 @@ void CV_StereoMatchingTest::run(int) continue; } int dispScaleFactor = datasetsParams[datasetName].dispScaleFactor; - Mat tmp; trueLeftDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor ); trueLeftDisp = tmp; tmp.release(); + trueLeftDisp.convertTo( trueLeftDisp, CV_32FC1, 1.f/dispScaleFactor ); if( !trueRightDisp.empty() ) - trueRightDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor ); trueRightDisp = tmp; tmp.release(); + trueRightDisp.convertTo( trueRightDisp, CV_32FC1, 1.f/dispScaleFactor ); Mat leftDisp, rightDisp; int ignBorder = max(runStereoMatchingAlgorithm(leftImg, rightImg, leftDisp, rightDisp, ci), EVAL_IGNORE_BORDER); - leftDisp.convertTo( tmp, CV_32FC1 ); leftDisp = tmp; tmp.release(); - rightDisp.convertTo( tmp, CV_32FC1 ); rightDisp = tmp; tmp.release(); + leftDisp.convertTo( leftDisp, CV_32FC1 ); + rightDisp.convertTo( rightDisp, CV_32FC1 ); int tempCode = processStereoMatchingResults( resFS, ci, isWrite, leftImg, rightImg, trueLeftDisp, trueRightDisp, leftDisp, rightDisp, QualityEvalParams(ignBorder)); From ae47b8f06c9d9a7fe607b4c7ba34d15d52582c5c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:23:27 +0400 Subject: [PATCH 15/67] fixed misprint in icvCreateIsometricImage --- modules/legacy/src/epilines.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/legacy/src/epilines.cpp b/modules/legacy/src/epilines.cpp index 309ca0b1d..7d952111d 100644 --- a/modules/legacy/src/epilines.cpp +++ b/modules/legacy/src/epilines.cpp @@ -2115,7 +2115,7 @@ CV_IMPL IplImage* icvCreateIsometricImage( IplImage* src, IplImage* dst, if( !dst || dst->depth != desired_depth || dst->nChannels != desired_num_channels || dst_size.width != src_size.width || - dst_size.height != dst_size.height ) + dst_size.height != src_size.height ) { cvReleaseImage( &dst ); dst = cvCreateImage( src_size, desired_depth, desired_num_channels ); From 5a4fa4607b417bd152f168eaa425da60600f4d3c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:26:49 +0400 Subject: [PATCH 16/67] fixed misprint in imgwarp.cpp --- modules/imgproc/src/imgwarp.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index c2506590d..2fe80616a 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -3669,7 +3669,7 @@ cv2DRotationMatrix( CvPoint2D32f center, double angle, double scale, CvMat* matrix ) { cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getRotationMatrix2D(center, angle, scale); - CV_Assert( M.size() == M.size() ); + CV_Assert( M.size() == M0.size() ); M.convertTo(M0, M0.type()); return matrix; } @@ -3682,7 +3682,7 @@ cvGetPerspectiveTransform( const CvPoint2D32f* src, { cv::Mat M0 = cv::cvarrToMat(matrix), M = cv::getPerspectiveTransform((const cv::Point2f*)src, (const cv::Point2f*)dst); - CV_Assert( M.size() == M.size() ); + CV_Assert( M.size() == M0.size() ); M.convertTo(M0, M0.type()); return matrix; } From ce2284e2e6e624c1fea54c2470deab90fc35511d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:29:13 +0400 Subject: [PATCH 17/67] removed duplication --- modules/core/include/opencv2/core/mat.hpp | 2 +- modules/core/src/matrix.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index 92301cf3b..c19caf902 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -431,7 +431,7 @@ template inline _Tp* Mat::ptr(int y) template inline const _Tp* Mat::ptr(int y) const { - CV_DbgAssert( y == 0 || (data && dims >= 1 && data && (unsigned)y < (unsigned)size.p[0]) ); + CV_DbgAssert( y == 0 || (data && dims >= 1 && (unsigned)y < (unsigned)size.p[0]) ); return (const _Tp*)(data + step.p[0]*y); } diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 11a4b0266..7560c2e59 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -184,7 +184,7 @@ static void finalizeHdr(Mat& m) void Mat::create(int d, const int* _sizes, int _type) { int i; - CV_Assert(0 <= d && _sizes && d <= CV_MAX_DIM && _sizes); + CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes); _type = CV_MAT_TYPE(_type); if( data && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) From d2b093d809bb170020ac7f1f88f95999ce9c2948 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:31:56 +0400 Subject: [PATCH 18/67] fixed potential dereference of null pointer --- modules/legacy/src/blobtrackingauto.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/legacy/src/blobtrackingauto.cpp b/modules/legacy/src/blobtrackingauto.cpp index fab0503bd..59e0ee60f 100644 --- a/modules/legacy/src/blobtrackingauto.cpp +++ b/modules/legacy/src/blobtrackingauto.cpp @@ -429,10 +429,11 @@ void CvBlobTrackerAuto1::Process(IplImage* pImg, IplImage* pMask) for(i=0; iID = m_NextBlobID; if(pBN && pBN->w >= CV_BLOB_MINW && pBN->h >= CV_BLOB_MINH) { + pBN->ID = m_NextBlobID; + CvBlob* pB = m_pBT->AddBlob(pBN, pImg, pmask ); if(pB) { From 3c86788b1f424acd30bf0ea491b23408d9d07bb7 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:35:33 +0400 Subject: [PATCH 19/67] fixed incorrect sizeof() expression in CvCaptureCAM_VFW::open --- modules/highgui/src/cap_vfw.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/highgui/src/cap_vfw.cpp b/modules/highgui/src/cap_vfw.cpp index 4e6ff5e13..d419a4891 100644 --- a/modules/highgui/src/cap_vfw.cpp +++ b/modules/highgui/src/cap_vfw.cpp @@ -406,7 +406,7 @@ bool CvCaptureCAM_VFW::open( int wIndex ) fourcc = (DWORD)-1; memset( &caps, 0, sizeof(caps)); - capDriverGetCaps( hWndC, &caps, sizeof(&caps)); + capDriverGetCaps( hWndC, &caps, sizeof(caps)); ::MoveWindow( hWndC, 0, 0, 320, 240, TRUE ); capSetUserData( hWndC, (size_t)this ); capSetCallbackOnFrame( hWndC, frameCallback ); From 43f38df1d26ad91c912145d6d2169f7108120e88 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:37:48 +0400 Subject: [PATCH 20/67] fixed incorrect sizeof() expression in CvCalibFilter::SetCameraCount --- modules/legacy/src/calibfilter.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/legacy/src/calibfilter.cpp b/modules/legacy/src/calibfilter.cpp index f1a4d7cd2..99cc04ce8 100644 --- a/modules/legacy/src/calibfilter.cpp +++ b/modules/legacy/src/calibfilter.cpp @@ -235,7 +235,7 @@ void CvCalibFilter::SetCameraCount( int count ) cvReleaseMat( &rectMap[i][1] ); } - memset( latestCounts, 0, sizeof(latestPoints) ); + memset( latestCounts, 0, sizeof(latestCounts) ); maxPoints = 0; cameraCount = count; } From d27d091e59c1a9143dd01caf3f7c644807610c0b Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:53:05 +0400 Subject: [PATCH 21/67] fixed mind/maxd search in CvFuzzyMeanShiftTracker::SearchWindow::initDepthValues --- modules/contrib/src/fuzzymeanshifttracker.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/contrib/src/fuzzymeanshifttracker.cpp b/modules/contrib/src/fuzzymeanshifttracker.cpp index 443b961ed..c83f915b0 100644 --- a/modules/contrib/src/fuzzymeanshifttracker.cpp +++ b/modules/contrib/src/fuzzymeanshifttracker.cpp @@ -380,6 +380,7 @@ void CvFuzzyMeanShiftTracker::SearchWindow::initDepthValues(IplImage *maskImage, { if (*depthData) { + d = *depthData; m1 += d; if (d < mind) mind = d; From 3d095ccc825b305d08e77284c2d4eae6e3dc202a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:53:43 +0400 Subject: [PATCH 22/67] fixed condition in KeyPointsFilter::retainBest --- modules/features2d/src/keypoint.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/features2d/src/keypoint.cpp b/modules/features2d/src/keypoint.cpp index 9b85fd39c..b19cfbfd3 100644 --- a/modules/features2d/src/keypoint.cpp +++ b/modules/features2d/src/keypoint.cpp @@ -192,7 +192,7 @@ struct KeypointResponseGreater void KeyPointsFilter::retainBest(vector& keypoints, int n_points) { //this is only necessary if the keypoints size is greater than the number of desired points. - if( n_points > 0 && keypoints.size() > (size_t)n_points ) + if( n_points >= 0 && keypoints.size() > (size_t)n_points ) { if (n_points==0) { From b62cf65b917e43db154dd6b0377b1a5deced2890 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 11:55:02 +0400 Subject: [PATCH 23/67] fixed inconsistent new/delete operators --- modules/objdetect/src/hog.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/objdetect/src/hog.cpp b/modules/objdetect/src/hog.cpp index f1a32c434..1eab434f6 100644 --- a/modules/objdetect/src/hog.cpp +++ b/modules/objdetect/src/hog.cpp @@ -2627,7 +2627,7 @@ void HOGDescriptor::readALTModel(std::string modelfile) detector.push_back((float)-linearbias); setSVMDetector(detector); - delete linearwt; + delete [] linearwt; } else { throw Exception(); } From f2fe89c6d85da182d08f629922b3b71726753ff9 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 1 Apr 2013 12:38:26 +0400 Subject: [PATCH 24/67] CMakeLists.txt for Android native activity added. --- samples/android/native-activity/CMakeLists.txt | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 samples/android/native-activity/CMakeLists.txt diff --git a/samples/android/native-activity/CMakeLists.txt b/samples/android/native-activity/CMakeLists.txt new file mode 100644 index 000000000..1f6d97439 --- /dev/null +++ b/samples/android/native-activity/CMakeLists.txt @@ -0,0 +1,12 @@ +set(sample example-native-activity) + +if(BUILD_FAT_JAVA_LIB) + set(native_deps opencv_java) +else() + set(native_deps opencv_highgui opencv_imgproc) +endif() + +add_android_project(${sample} "${CMAKE_CURRENT_SOURCE_DIR}" LIBRARY_DEPS ${OpenCV_BINARY_DIR} SDK_TARGET 9 ${ANDROID_SDK_TARGET} NATIVE_DEPS ${native_deps}) +if(TARGET ${sample}) + add_dependencies(opencv_android_examples ${sample}) +endif() From 8e10cd8946ea6832e63ae58e2f03de250d758251 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 1 Apr 2013 14:15:06 +0400 Subject: [PATCH 25/67] fixed gpu module compilation under windows --- modules/gpu/src/element_operations.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/gpu/src/element_operations.cpp b/modules/gpu/src/element_operations.cpp index afce5bbc7..a9b003937 100644 --- a/modules/gpu/src/element_operations.cpp +++ b/modules/gpu/src/element_operations.cpp @@ -1793,10 +1793,10 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) namespace arithm { - void cmpMatEq_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); - void cmpMatNe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); - void cmpMatLt_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); - void cmpMatLe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); + void cmpMatEq_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); + void cmpMatNe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); + void cmpMatLt_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); + void cmpMatLe_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); template void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); template void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); @@ -1820,7 +1820,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c {cmpMatEq , cmpMatNe , cmpMatLt , cmpMatLe } }; - typedef void (*func_v4_t)(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); + typedef void (*func_v4_t)(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); static const func_v4_t funcs_v4[] = { cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4 From e640985e22f4a1082ab321a1c5610c06860f90d8 Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Mon, 1 Apr 2013 13:30:44 +0400 Subject: [PATCH 26/67] Update Android toolchain to the latest version supporting NDK r8e --- android/android.toolchain.cmake | 157 ++++++++++++++++++++++---------- 1 file changed, 111 insertions(+), 46 deletions(-) diff --git a/android/android.toolchain.cmake b/android/android.toolchain.cmake index f5daf307f..0f7e34067 100644 --- a/android/android.toolchain.cmake +++ b/android/android.toolchain.cmake @@ -1,5 +1,5 @@ # Copyright (c) 2010-2011, Ethan Rublee -# Copyright (c) 2011-2012, Andrey Kamaev +# Copyright (c) 2011-2013, Andrey Kamaev # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -281,8 +281,14 @@ # [+] updated for NDK r8c # [+] added support for clang compiler # - December 2012 +# [+] suppress warning about unused CMAKE_TOOLCHAIN_FILE variable +# [+] adjust API level to closest compatible as NDK does # [~] fixed ccache full path search # [+] updated for NDK r8d +# [~] compiler options are aligned with NDK r8d +# - March 2013 +# [+] updated for NDK r8e (x86 version) +# [+] support x86_64 version of NDK # ------------------------------------------------------------------------------ cmake_minimum_required( VERSION 2.6.3 ) @@ -292,6 +298,10 @@ if( DEFINED CMAKE_CROSSCOMPILING ) return() endif() +if( CMAKE_TOOLCHAIN_FILE ) + # touch toolchain variable only to suppress "unused variable" warning +endif() + get_property( _CMAKE_IN_TRY_COMPILE GLOBAL PROPERTY IN_TRY_COMPILE ) if( _CMAKE_IN_TRY_COMPILE ) include( "${CMAKE_CURRENT_SOURCE_DIR}/../android.toolchain.config.cmake" OPTIONAL ) @@ -305,7 +315,7 @@ set( CMAKE_SYSTEM_VERSION 1 ) # rpath makes low sence for Android set( CMAKE_SKIP_RPATH TRUE CACHE BOOL "If set, runtime paths are not added when using shared libraries." ) -set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" ) +set( ANDROID_SUPPORTED_NDK_VERSIONS ${ANDROID_EXTRA_NDK_VERSIONS} -r8e -r8d -r8c -r8b -r8 -r7c -r7b -r7 -r6b -r6 -r5c -r5b -r5 "" ) if(NOT DEFINED ANDROID_NDK_SEARCH_PATHS) if( CMAKE_HOST_WIN32 ) file( TO_CMAKE_PATH "$ENV{PROGRAMFILES}" ANDROID_NDK_SEARCH_PATHS ) @@ -449,19 +459,32 @@ if( ANDROID_FORBID_SYGWIN ) endif() endif() + # detect current host platform +if( NOT DEFINED ANDROID_NDK_HOST_X64 AND CMAKE_HOST_SYSTEM_PROCESSOR MATCHES "amd64|x86_64|AMD64") + set( ANDROID_NDK_HOST_X64 1 CACHE BOOL "Try to use 64-bit compiler toolchain" ) + mark_as_advanced( ANDROID_NDK_HOST_X64 ) +endif() + set( TOOL_OS_SUFFIX "" ) if( CMAKE_HOST_APPLE ) - set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME "darwin-x86_64" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME2 "darwin-x86" ) elseif( CMAKE_HOST_WIN32 ) - set( ANDROID_NDK_HOST_SYSTEM_NAME "windows" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME "windows-x86_64" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME2 "windows" ) set( TOOL_OS_SUFFIX ".exe" ) elseif( CMAKE_HOST_UNIX ) - set( ANDROID_NDK_HOST_SYSTEM_NAME "linux-x86" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME "linux-x86_64" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME2 "linux-x86" ) else() message( FATAL_ERROR "Cross-compilation on your platform is not supported by this cmake toolchain" ) endif() +if( NOT ANDROID_NDK_HOST_X64 ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) +endif() + # see if we have path to Android NDK __INIT_VARIABLE( ANDROID_NDK PATH ENV_ANDROID_NDK ) if( NOT ANDROID_NDK ) @@ -509,7 +532,8 @@ if( ANDROID_NDK ) endif() set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE ) set( BUILD_WITH_ANDROID_NDK True ) - file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) + file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) + string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) elseif( ANDROID_STANDALONE_TOOLCHAIN ) get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE ) # try to detect change @@ -563,22 +587,21 @@ if( BUILD_WITH_STANDALONE_TOOLCHAIN ) endif() endif() -macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar ) - foreach( __toolchain ${${__availableToolchainsVar}} ) +macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name ) + foreach( __toolchain ${${__availableToolchainsLst}} ) if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" ) string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" ) else() set( __gcc_toolchain "${__toolchain}" ) endif() - __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" ) if( __machine ) string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" ) string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" ) list( APPEND __availableToolchainMachines "${__machine}" ) list( APPEND __availableToolchainArchs "${__arch}" ) list( APPEND __availableToolchainCompilerVersions "${__version}" ) - else() - list( REMOVE_ITEM ${__availableToolchainsVar} "${__toolchain}" ) + list( APPEND ${__availableToolchainsVar} "${__toolchain}" ) endif() unset( __gcc_toolchain ) endforeach() @@ -594,17 +617,29 @@ if( BUILD_WITH_ANDROID_NDK ) set( __availableToolchainCompilerVersions "" ) if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" ) # do not go through all toolchains if we know the name - set( __availableToolchains "${ANDROID_TOOLCHAIN_NAME}" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains ) + set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + if( __availableToolchains ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + endif() + endif() endif() if( NOT __availableToolchains ) - file( GLOB __availableToolchains RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" ) + file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" ) if( __availableToolchains ) - list(SORT __availableToolchains) # we need clang to go after gcc + list(SORT __availableToolchainsLst) # we need clang to go after gcc + endif() + __LIST_FILTER( __availableToolchainsLst "^[.]" ) + __LIST_FILTER( __availableToolchainsLst "llvm" ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + if( __availableToolchains ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + endif() endif() - __LIST_FILTER( __availableToolchains "^[.]" ) - __LIST_FILTER( __availableToolchains "llvm" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains ) endif() if( NOT __availableToolchains ) message( FATAL_ERROR "Could not find any working toolchain in the NDK. Probably your Android NDK is broken." ) @@ -617,11 +652,11 @@ set( __uniqToolchainArchNames ${__availableToolchainArchs} ) list( REMOVE_DUPLICATES __uniqToolchainArchNames ) list( SORT __uniqToolchainArchNames ) foreach( __arch ${__uniqToolchainArchNames} ) -list( APPEND ANDROID_SUPPORTED_ABIS ${ANDROID_SUPPORTED_ABIS_${__arch}} ) + list( APPEND ANDROID_SUPPORTED_ABIS ${ANDROID_SUPPORTED_ABIS_${__arch}} ) endforeach() unset( __uniqToolchainArchNames ) if( NOT ANDROID_SUPPORTED_ABIS ) -message( FATAL_ERROR "No one of known Android ABIs is supported by this cmake toolchain." ) + message( FATAL_ERROR "No one of known Android ABIs is supported by this cmake toolchain." ) endif() # choose target ABI @@ -760,11 +795,22 @@ unset( __availableToolchainCompilerVersions ) # choose native API level __INIT_VARIABLE( ANDROID_NATIVE_API_LEVEL ENV_ANDROID_NATIVE_API_LEVEL ANDROID_API_LEVEL ENV_ANDROID_API_LEVEL ANDROID_STANDALONE_TOOLCHAIN_API_LEVEL ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME} ANDROID_DEFAULT_NDK_API_LEVEL ) string( REGEX MATCH "[0-9]+" ANDROID_NATIVE_API_LEVEL "${ANDROID_NATIVE_API_LEVEL}" ) -# TODO: filter out unsupported levels +# adjust API level +set( __real_api_level ${ANDROID_DEFAULT_NDK_API_LEVEL_${ANDROID_ARCH_NAME}} ) +foreach( __level ${ANDROID_SUPPORTED_NATIVE_API_LEVELS} ) + if( NOT __level GREATER ANDROID_NATIVE_API_LEVEL AND NOT __level LESS __real_api_level ) + set( __real_api_level ${__level} ) + endif() +endforeach() +if( __real_api_level AND NOT ANDROID_NATIVE_API_LEVEL EQUAL __real_api_level ) + message( STATUS "Adjusting Android API level 'android-${ANDROID_NATIVE_API_LEVEL}' to 'android-${__real_api_level}'") + set( ANDROID_NATIVE_API_LEVEL ${__real_api_level} ) +endif() +unset(__real_api_level) # validate list( FIND ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_NATIVE_API_LEVEL}" __levelIdx ) if( __levelIdx EQUAL -1 ) - message( SEND_ERROR "Specified Android native API level (${ANDROID_NATIVE_API_LEVEL}) is not supported by your NDK/toolchain." ) + message( SEND_ERROR "Specified Android native API level 'android-${ANDROID_NATIVE_API_LEVEL}' is not supported by your NDK/toolchain." ) else() if( BUILD_WITH_ANDROID_NDK ) __DETECT_NATIVE_API_LEVEL( __realApiLevel "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}/usr/include/android/api-level.h" ) @@ -926,7 +972,7 @@ elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" ) string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}") string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" ) - message( FATAL_ERROR "Could not find the " ) + message( FATAL_ERROR "Could not find the Clang compiler driver" ) endif() set( ANDROID_COMPILER_IS_CLANG 1 ) set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) @@ -1140,38 +1186,52 @@ endif() # NDK flags if( ARMEABI OR ARMEABI_V7A ) - set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -D__ARM_ARCH_5__ -D__ARM_ARCH_5T__ -D__ARM_ARCH_5E__ -D__ARM_ARCH_5TE__" ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -funwind-tables" ) if( NOT ANDROID_FORCE_ARM_BUILD AND NOT ARMEABI_V6 ) - # It is recommended to use the -mthumb compiler flag to force the generation - # of 16-bit Thumb-1 instructions (the default being 32-bit ARM ones). - set( ANDROID_CXX_FLAGS_RELEASE "-mthumb" ) - set( ANDROID_CXX_FLAGS_DEBUG "-marm -finline-limit=64" ) + set( ANDROID_CXX_FLAGS_RELEASE "-mthumb -fomit-frame-pointer -fno-strict-aliasing" ) + set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -finline-limit=64" ) + endif() else() # always compile ARMEABI_V6 in arm mode; otherwise there is no difference from ARMEABI - # O3 instead of O2/Os in release mode - like cmake sets for desktop gcc - set( ANDROID_CXX_FLAGS_RELEASE "-marm" ) - set( ANDROID_CXX_FLAGS_DEBUG "-marm -finline-limit=300" ) + set( ANDROID_CXX_FLAGS_RELEASE "-marm -fomit-frame-pointer -fstrict-aliasing" ) + set( ANDROID_CXX_FLAGS_DEBUG "-marm -fno-omit-frame-pointer -fno-strict-aliasing" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" ) + endif() endif() elseif( X86 ) - set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funwind-tables" ) - set( ANDROID_CXX_FLAGS_RELEASE "" ) - set( ANDROID_CXX_FLAGS_DEBUG "-finline-limit=300" ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funwind-tables" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -funswitch-loops -finline-limit=300" ) + else() + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fPIC" ) + endif() + set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer -fstrict-aliasing" ) + set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer -fno-strict-aliasing" ) elseif( MIPS ) - set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -funwind-tables -fmessage-length=0 -fno-inline-functions-called-once -frename-registers" ) - set( ANDROID_CXX_FLAGS_RELEASE "-finline-limit=300 -fno-strict-aliasing" ) - set( ANDROID_CXX_FLAGS_DEBUG "-finline-functions -fgcse-after-reload -frerun-cse-after-loop" ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fpic -fno-strict-aliasing -finline-functions -ffunction-sections -funwind-tables -fmessage-length=0" ) + set( ANDROID_CXX_FLAGS_RELEASE "-fomit-frame-pointer" ) + set( ANDROID_CXX_FLAGS_DEBUG "-fno-omit-frame-pointer" ) + if( NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fno-inline-functions-called-once -fgcse-after-reload -frerun-cse-after-loop -frename-registers" ) + set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} -funswitch-loops -finline-limit=300" ) + endif() elseif() set( ANDROID_CXX_FLAGS_RELEASE "" ) set( ANDROID_CXX_FLAGS_DEBUG "" ) endif() -if( NOT X86 ) - set( ANDROID_CXX_FLAGS "-Wno-psabi ${ANDROID_CXX_FLAGS}" ) +set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fsigned-char" ) # good/necessary when porting desktop libraries + +if( NOT X86 AND NOT ANDROID_COMPILER_IS_CLANG ) + set( ANDROID_CXX_FLAGS "-Wno-psabi ${ANDROID_CXX_FLAGS}" ) endif() -set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -fsigned-char" ) # good/necessary when porting desktop libraries -set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE} -fomit-frame-pointer" ) -set( ANDROID_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG} -fno-strict-aliasing -fno-omit-frame-pointer" ) +if( NOT ANDROID_COMPILER_VERSION VERSION_LESS "4.6" ) + set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -no-canonical-prefixes" ) # see https://android-review.googlesource.com/#/c/47564/ +endif() # ABI-specific flags if( ARMEABI_V7A ) @@ -1308,9 +1368,6 @@ if( ANDROID_COMPILER_IS_CLANG ) set( ANDROID_CXX_FLAGS "-target ${ANDROID_LLVM_TRIPLE} ${ANDROID_CXX_FLAGS}" ) endif() if( BUILD_WITH_ANDROID_NDK ) - if(ANDROID_ARCH_NAME STREQUAL "arm" ) - set( ANDROID_CXX_FLAGS "-isystem ${ANDROID_CLANG_TOOLCHAIN_ROOT}/lib/clang/${ANDROID_CLANG_VERSION}/include ${ANDROID_CXX_FLAGS}" ) - endif() set( ANDROID_CXX_FLAGS "-gcc-toolchain ${ANDROID_TOOLCHAIN_ROOT} ${ANDROID_CXX_FLAGS}" ) endif() endif() @@ -1326,6 +1383,12 @@ set( CMAKE_SHARED_LINKER_FLAGS "" CACHE STRING "shared li set( CMAKE_MODULE_LINKER_FLAGS "" CACHE STRING "module linker flags" ) set( CMAKE_EXE_LINKER_FLAGS "-Wl,-z,nocopyreloc" CACHE STRING "executable linker flags" ) +# put flags to cache (for debug purpose only) +set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS}" CACHE INTERNAL "Android specific c/c++ flags" ) +set( ANDROID_CXX_FLAGS_RELEASE "${ANDROID_CXX_FLAGS_RELEASE}" CACHE INTERNAL "Android specific c/c++ Release flags" ) +set( ANDROID_CXX_FLAGS_DEBUG "${ANDROID_CXX_FLAGS_DEBUG}" CACHE INTERNAL "Android specific c/c++ Debug flags" ) +set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS}" CACHE INTERNAL "Android specific c/c++ linker flags" ) + # finish flags set( CMAKE_CXX_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_CXX_FLAGS}" ) set( CMAKE_C_FLAGS "${ANDROID_CXX_FLAGS} ${CMAKE_C_FLAGS}" ) @@ -1456,6 +1519,7 @@ endmacro() if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) set( __toolchain_config "") foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES + ANDROID_NDK_HOST_X64 ANDROID_NDK ANDROID_STANDALONE_TOOLCHAIN ANDROID_TOOLCHAIN_NAME @@ -1512,6 +1576,7 @@ endif() # ANDROID_NDK # ANDROID_STANDALONE_TOOLCHAIN # ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain +# ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems) # LIBRARY_OUTPUT_PATH_ROOT : # NDK_CCACHE : # Obsolete: @@ -1536,7 +1601,7 @@ endif() # BUILD_WITH_STANDALONE_TOOLCHAIN : TRUE if standalone toolchain is used # ANDROID_NDK_HOST_SYSTEM_NAME : "windows", "linux-x86" or "darwin-x86" depending on host platform # ANDROID_NDK_ABI_NAME : "armeabi", "armeabi-v7a", "x86" or "mips" depending on ANDROID_ABI -# ANDROID_NDK_RELEASE : one of r5, r5b, r5c, r6, r6b, r7, r7b, r7c, r8, r8b, r8c, r8d; set only for NDK +# ANDROID_NDK_RELEASE : one of r5, r5b, r5c, r6, r6b, r7, r7b, r7c, r8, r8b, r8c, r8d, r8e; set only for NDK # ANDROID_ARCH_NAME : "arm" or "x86" or "mips" depending on ANDROID_ABI # ANDROID_SYSROOT : path to the compiler sysroot # TOOL_OS_SUFFIX : "" or ".exe" depending on host platform From 10702c6d755909f8f90a106f728e34acd37cd188 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Mon, 1 Apr 2013 15:20:35 +0400 Subject: [PATCH 27/67] fixes in bundle adjustment code by Nils Hasler --- modules/contrib/include/opencv2/contrib/contrib.hpp | 2 +- modules/contrib/src/ba.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/contrib/include/opencv2/contrib/contrib.hpp b/modules/contrib/include/opencv2/contrib/contrib.hpp index 0085b0dfe..f18a5f93c 100644 --- a/modules/contrib/include/opencv2/contrib/contrib.hpp +++ b/modules/contrib/include/opencv2/contrib/contrib.hpp @@ -435,7 +435,7 @@ namespace cv typedef bool (*BundleAdjustCallback)(int iteration, double norm_error, void* user_data); - class LevMarqSparse { + class CV_EXPORTS LevMarqSparse { public: LevMarqSparse(); LevMarqSparse(int npoints, // number of points diff --git a/modules/contrib/src/ba.cpp b/modules/contrib/src/ba.cpp index a0f904665..80047877b 100644 --- a/modules/contrib/src/ba.cpp +++ b/modules/contrib/src/ba.cpp @@ -1105,7 +1105,7 @@ void LevMarqSparse::bundleAdjust( vector& points, //positions of points Mat rot_vec = Mat(levmar.P).rowRange(i*num_cam_param, i*num_cam_param+3); Rodrigues( rot_vec, R[i] ); //translation - T[i] = Mat(levmar.P).rowRange(i*num_cam_param + 3, i*num_cam_param+6); + Mat(levmar.P).rowRange(i*num_cam_param + 3, i*num_cam_param+6).copyTo(T[i]); //intrinsic camera matrix double* intr_data = (double*)cameraMatrix[i].data; From 10f6ebfdf757bef5483327982d735f2999a01bc5 Mon Sep 17 00:00:00 2001 From: yao Date: Tue, 2 Apr 2013 11:35:40 +0800 Subject: [PATCH 28/67] fix the crash when calling dft --- modules/ocl/src/fft.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/modules/ocl/src/fft.cpp b/modules/ocl/src/fft.cpp index 36c635860..7aa40e8b7 100644 --- a/modules/ocl/src/fft.cpp +++ b/modules/ocl/src/fft.cpp @@ -206,7 +206,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla clStridesIn[2] = is_row_dft ? clStridesIn[1] : dft_size.width * clStridesIn[1]; clStridesOut[2] = is_row_dft ? clStridesOut[1] : dft_size.width * clStridesOut[1]; - openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, (cl_context)getoclContext(), dim, clLengthsIn ) ); + openCLSafeCall( clAmdFftCreateDefaultPlan( &plHandle, *(cl_context*)getoclContext(), dim, clLengthsIn ) ); openCLSafeCall( clAmdFftSetResultLocation( plHandle, CLFFT_OUTOFPLACE ) ); openCLSafeCall( clAmdFftSetLayout( plHandle, inLayout, outLayout ) ); @@ -220,8 +220,7 @@ cv::ocl::FftPlan::FftPlan(Size _dft_size, int _src_step, int _dst_step, int _fla openCLSafeCall( clAmdFftSetPlanScale ( plHandle, is_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale_ ) ); //ready to bake - cl_command_queue clq = (cl_command_queue)getoclCommandQueue(); - openCLSafeCall( clAmdFftBakePlan( plHandle, 1, &clq, NULL, NULL ) ); + openCLSafeCall( clAmdFftBakePlan( plHandle, 1, (cl_command_queue*)getoclCommandQueue(), NULL, NULL ) ); } cv::ocl::FftPlan::~FftPlan() { From f3254b28f2be208bfaa0dca155eb1c8b26a62d17 Mon Sep 17 00:00:00 2001 From: yao Date: Tue, 2 Apr 2013 14:41:02 +0800 Subject: [PATCH 29/67] use clflush replaces clfinish --- modules/ocl/include/opencv2/ocl/ocl.hpp | 3 +++ modules/ocl/perf/precomp.hpp | 2 ++ modules/ocl/src/initialization.cpp | 7 ++++++- modules/ocl/src/mcwutil.cpp | 12 +++++++----- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index c321633b1..da7ca27ae 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -126,6 +126,9 @@ namespace cv CV_EXPORTS void* getoclCommandQueue(); + //explicit call clFinish. The global command queue will be used. + CV_EXPORTS void finish(); + //this function enable ocl module to use customized cl_context and cl_command_queue //getDevice also need to be called before this function CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); diff --git a/modules/ocl/perf/precomp.hpp b/modules/ocl/perf/precomp.hpp index 819ac5925..c2cf1238e 100644 --- a/modules/ocl/perf/precomp.hpp +++ b/modules/ocl/perf/precomp.hpp @@ -375,6 +375,7 @@ struct name##_test: Runnable { \ while (!TestSystem::instance().stop()) { \ TestSystem::instance().gpuOn() #define GPU_OFF \ + ocl::finish(); \ TestSystem::instance().gpuOff(); \ } TestSystem::instance().gpuComplete() @@ -388,4 +389,5 @@ struct name##_test: Runnable { \ #define WARMUP_ON \ while (!TestSystem::instance().warmupStop()) { #define WARMUP_OFF \ + ocl::finish(); \ } TestSystem::instance().warmupComplete() diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 9a790f4ee..b582f1ce3 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -354,6 +354,11 @@ namespace cv return &(Context::getContext()->impl->clCmdQueue); } + void finish() + { + clFinish(Context::getContext()->impl->clCmdQueue); + } + void queryDeviceInfo(DEVICE_INFO info_type, void* info) { static Info::Impl* impl = Context::getContext()->impl; @@ -712,7 +717,7 @@ namespace cv clReleaseEvent(event); #endif - clFinish(clCxt->impl->clCmdQueue); + clFlush(clCxt->impl->clCmdQueue); openCLSafeCall(clReleaseKernel(kernel)); } diff --git a/modules/ocl/src/mcwutil.cpp b/modules/ocl/src/mcwutil.cpp index 8b7e18764..15df8e044 100644 --- a/modules/ocl/src/mcwutil.cpp +++ b/modules/ocl/src/mcwutil.cpp @@ -144,7 +144,7 @@ namespace cv format.image_channel_data_type = CL_FLOAT; break; default: - throw std::exception(); + CV_Error(-1, "Image forma is not supported"); break; } switch(channels) @@ -159,7 +159,7 @@ namespace cv format.image_channel_order = CL_RGBA; break; default: - throw std::exception(); + CV_Error(-1, "Image forma is not supported"); break; } #if CL_VERSION_1_2 @@ -197,7 +197,8 @@ namespace cv const size_t regin[3] = {mat.cols * mat.elemSize(), mat.rows, 1}; clEnqueueCopyBufferRect((cl_command_queue)mat.clCxt->oclCommandQueue(), (cl_mem)mat.data, devData, origin, origin, regin, mat.step, 0, mat.cols * mat.elemSize(), 0, 0, NULL, NULL); - } + clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); + } else { devData = (cl_mem)mat.data; @@ -206,7 +207,7 @@ namespace cv clEnqueueCopyBufferToImage((cl_command_queue)mat.clCxt->oclCommandQueue(), devData, texture, 0, origin, region, 0, NULL, 0); if ((mat.cols * mat.elemSize() != mat.step)) { - clFinish((cl_command_queue)mat.clCxt->oclCommandQueue()); + clFlush((cl_command_queue)mat.clCxt->oclCommandQueue()); clReleaseMemObject(devData); } @@ -231,7 +232,8 @@ namespace cv try { cv::ocl::openCLGetKernelFromSource(clCxt, &_kernel_string, "test_func"); - //_support = true; + finish(); + _support = true; } catch (const cv::Exception& e) { From 9aa29373b47712d9b65ca88a0c52aaba60707f06 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Tue, 2 Apr 2013 11:39:26 +0400 Subject: [PATCH 30/67] this test is unstable (fails from time to time), let's disable it until fixed --- modules/calib3d/test/test_solvepnp_ransac.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/calib3d/test/test_solvepnp_ransac.cpp b/modules/calib3d/test/test_solvepnp_ransac.cpp index dc66c1dc7..3377a57c6 100644 --- a/modules/calib3d/test/test_solvepnp_ransac.cpp +++ b/modules/calib3d/test/test_solvepnp_ransac.cpp @@ -236,7 +236,7 @@ protected: } }; -TEST(Calib3d_SolvePnPRansac, accuracy) { CV_solvePnPRansac_Test test; test.safe_run(); } +TEST(DISABLED_Calib3d_SolvePnPRansac, accuracy) { CV_solvePnPRansac_Test test; test.safe_run(); } TEST(Calib3d_SolvePnP, accuracy) { CV_solvePnP_Test test; test.safe_run(); } From 75ea10e6ff8381890ca57d112b4d1985e8eddf01 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Tue, 2 Apr 2013 12:14:43 +0400 Subject: [PATCH 31/67] fix for #2806 (missing 'nu03' field of moments) --- modules/python/src2/cv2.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index a650f23f0..308eb4263 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -957,7 +957,7 @@ static inline PyObject* pyopencv_from(const Moments& m) "mu20", m.mu20, "mu11", m.mu11, "mu02", m.mu02, "mu30", m.mu30, "mu21", m.mu21, "mu12", m.mu12, "mu03", m.mu03, "nu20", m.nu20, "nu11", m.nu11, "nu02", m.nu02, - "nu30", m.nu30, "nu21", m.nu21, "nu12", m.nu12, "mu03", m.nu03); + "nu30", m.nu30, "nu21", m.nu21, "nu12", m.nu12, "nu03", m.nu03); } static inline PyObject* pyopencv_from(const CvDTreeNode* node) From b47a2012d290737d87ad9e4326572e36be6e6ed4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 2 Apr 2013 13:23:03 +0400 Subject: [PATCH 32/67] fixed assert condition (trueRightDisp can be empty) --- modules/calib3d/test/test_stereomatching.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/calib3d/test/test_stereomatching.cpp b/modules/calib3d/test/test_stereomatching.cpp index 2be50a017..b715d48f5 100644 --- a/modules/calib3d/test/test_stereomatching.cpp +++ b/modules/calib3d/test/test_stereomatching.cpp @@ -530,7 +530,8 @@ int CV_StereoMatchingTest::processStereoMatchingResults( FileStorage& fs, int ca // rightDisp is not used in current test virsion int code = cvtest::TS::OK; assert( fs.isOpened() ); - assert( trueLeftDisp.type() == CV_32FC1 && trueRightDisp.type() == CV_32FC1 ); + assert( trueLeftDisp.type() == CV_32FC1 ); + assert( trueRightDisp.empty() || trueRightDisp.type() == CV_32FC1 ); assert( leftDisp.type() == CV_32FC1 && rightDisp.type() == CV_32FC1 ); // get masks for unknown ground truth disparity values From 9086efa8e92732595a3d0a1fa73eeaf461d915a4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 2 Apr 2013 13:24:00 +0400 Subject: [PATCH 33/67] fixed warnings --- modules/nonfree/src/surf_gpu.cpp | 3 +++ modules/nonfree/test/test_precomp.hpp | 8 +++++--- modules/nonfree/test/test_surf.ocl.cpp | 11 ----------- samples/gpu/super_resolution.cpp | 2 -- 4 files changed, 8 insertions(+), 16 deletions(-) diff --git a/modules/nonfree/src/surf_gpu.cpp b/modules/nonfree/src/surf_gpu.cpp index eabbd78b4..bfc7e700f 100644 --- a/modules/nonfree/src/surf_gpu.cpp +++ b/modules/nonfree/src/surf_gpu.cpp @@ -223,6 +223,9 @@ namespace } private: + SURF_GPU_Invoker(const SURF_GPU_Invoker&); + SURF_GPU_Invoker& operator =(const SURF_GPU_Invoker&); + SURF_GPU& surf_; int img_cols, img_rows; diff --git a/modules/nonfree/test/test_precomp.hpp b/modules/nonfree/test/test_precomp.hpp index 15f2b9573..5be95a317 100644 --- a/modules/nonfree/test/test_precomp.hpp +++ b/modules/nonfree/test/test_precomp.hpp @@ -14,14 +14,16 @@ #include "opencv2/highgui/highgui.hpp" #include "opencv2/nonfree/nonfree.hpp" +#include "opencv2/ts/gpu_test.hpp" + #include "opencv2/opencv_modules.hpp" + #ifdef HAVE_OPENCV_OCL # include "opencv2/nonfree/ocl.hpp" #endif -#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA) - #include "opencv2/ts/gpu_test.hpp" - #include "opencv2/nonfree/gpu.hpp" +#ifdef HAVE_OPENCV_GPU +# include "opencv2/nonfree/gpu.hpp" #endif #endif diff --git a/modules/nonfree/test/test_surf.ocl.cpp b/modules/nonfree/test/test_surf.ocl.cpp index 76ed37de4..d6a877bc8 100644 --- a/modules/nonfree/test/test_surf.ocl.cpp +++ b/modules/nonfree/test/test_surf.ocl.cpp @@ -109,17 +109,6 @@ static int getMatchedPointsCount(const std::vector& keypoints1, co return validCount; } -#define PARAM_TEST_CASE(name, ...) struct name : testing::TestWithParam< std::tr1::tuple< __VA_ARGS__ > > -#define IMPLEMENT_PARAM_CLASS(name, type) \ - namespace { class name { \ - public: \ - name ( type arg = type ()) : val_(arg) {} \ - operator type () const {return val_;} \ - private: \ - type val_; \ - }; \ - inline void PrintTo( name param, std::ostream* os) {*os << #name << "=" << testing::PrintToString(static_cast< type >(param));}} - IMPLEMENT_PARAM_CLASS(HessianThreshold, double) IMPLEMENT_PARAM_CLASS(Octaves, int) IMPLEMENT_PARAM_CLASS(OctaveLayers, int) diff --git a/samples/gpu/super_resolution.cpp b/samples/gpu/super_resolution.cpp index 2dd3656b0..80aa48029 100644 --- a/samples/gpu/super_resolution.cpp +++ b/samples/gpu/super_resolution.cpp @@ -48,8 +48,6 @@ static Ptr createOptFlow(const string& name, bool useGpu) cerr << "Incorrect Optical Flow algorithm - " << name << endl; exit(-1); } - - return Ptr(); } int main(int argc, const char* argv[]) From 642d7d68266fb8f12d07eefe069e87fa3490480b Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 2 Apr 2013 13:24:39 +0400 Subject: [PATCH 34/67] fixed nonfree test (run it only on one gpu device) --- modules/nonfree/test/test_gpu.cpp | 154 ++++++++++------------------- modules/nonfree/test/test_main.cpp | 70 ------------- 2 files changed, 50 insertions(+), 174 deletions(-) diff --git a/modules/nonfree/test/test_gpu.cpp b/modules/nonfree/test/test_gpu.cpp index ece82f316..30aec352c 100644 --- a/modules/nonfree/test/test_gpu.cpp +++ b/modules/nonfree/test/test_gpu.cpp @@ -58,9 +58,8 @@ namespace IMPLEMENT_PARAM_CLASS(SURF_Upright, bool) } -PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright) +PARAM_TEST_CASE(SURF, SURF_HessianThreshold, SURF_Octaves, SURF_OctaveLayers, SURF_Extended, SURF_Upright) { - cv::gpu::DeviceInfo devInfo; double hessianThreshold; int nOctaves; int nOctaveLayers; @@ -69,14 +68,11 @@ PARAM_TEST_CASE(SURF, cv::gpu::DeviceInfo, SURF_HessianThreshold, SURF_Octaves, virtual void SetUp() { - devInfo = GET_PARAM(0); - hessianThreshold = GET_PARAM(1); - nOctaves = GET_PARAM(2); - nOctaveLayers = GET_PARAM(3); - extended = GET_PARAM(4); - upright = GET_PARAM(5); - - cv::gpu::setDevice(devInfo.deviceID()); + hessianThreshold = GET_PARAM(0); + nOctaves = GET_PARAM(1); + nOctaveLayers = GET_PARAM(2); + extended = GET_PARAM(3); + upright = GET_PARAM(4); } }; @@ -93,39 +89,24 @@ GPU_TEST_P(SURF, Detector) surf.upright = upright; surf.keypointsRatio = 0.05f; - if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) - { - try - { - std::vector keypoints; - surf(loadMat(image), cv::gpu::GpuMat(), keypoints); - } - catch (const cv::Exception& e) - { - ASSERT_EQ(CV_StsNotImplemented, e.code); - } - } - else - { - std::vector keypoints; - surf(loadMat(image), cv::gpu::GpuMat(), keypoints); + std::vector keypoints; + surf(loadMat(image), cv::gpu::GpuMat(), keypoints); - cv::SURF surf_gold; - surf_gold.hessianThreshold = hessianThreshold; - surf_gold.nOctaves = nOctaves; - surf_gold.nOctaveLayers = nOctaveLayers; - surf_gold.extended = extended; - surf_gold.upright = upright; + cv::SURF surf_gold; + surf_gold.hessianThreshold = hessianThreshold; + surf_gold.nOctaves = nOctaves; + surf_gold.nOctaveLayers = nOctaveLayers; + surf_gold.extended = extended; + surf_gold.upright = upright; - std::vector keypoints_gold; - surf_gold(image, cv::noArray(), keypoints_gold); + std::vector keypoints_gold; + surf_gold(image, cv::noArray(), keypoints_gold); - ASSERT_EQ(keypoints_gold.size(), keypoints.size()); - int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); - double matchedRatio = static_cast(matchedCount) / keypoints_gold.size(); + ASSERT_EQ(keypoints_gold.size(), keypoints.size()); + int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); + double matchedRatio = static_cast(matchedCount) / keypoints_gold.size(); - EXPECT_GT(matchedRatio, 0.95); - } + EXPECT_GT(matchedRatio, 0.95); } GPU_TEST_P(SURF, Detector_Masked) @@ -144,39 +125,24 @@ GPU_TEST_P(SURF, Detector_Masked) surf.upright = upright; surf.keypointsRatio = 0.05f; - if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) - { - try - { - std::vector keypoints; - surf(loadMat(image), loadMat(mask), keypoints); - } - catch (const cv::Exception& e) - { - ASSERT_EQ(CV_StsNotImplemented, e.code); - } - } - else - { - std::vector keypoints; - surf(loadMat(image), loadMat(mask), keypoints); + std::vector keypoints; + surf(loadMat(image), loadMat(mask), keypoints); - cv::SURF surf_gold; - surf_gold.hessianThreshold = hessianThreshold; - surf_gold.nOctaves = nOctaves; - surf_gold.nOctaveLayers = nOctaveLayers; - surf_gold.extended = extended; - surf_gold.upright = upright; + cv::SURF surf_gold; + surf_gold.hessianThreshold = hessianThreshold; + surf_gold.nOctaves = nOctaves; + surf_gold.nOctaveLayers = nOctaveLayers; + surf_gold.extended = extended; + surf_gold.upright = upright; - std::vector keypoints_gold; - surf_gold(image, mask, keypoints_gold); + std::vector keypoints_gold; + surf_gold(image, mask, keypoints_gold); - ASSERT_EQ(keypoints_gold.size(), keypoints.size()); - int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); - double matchedRatio = static_cast(matchedCount) / keypoints_gold.size(); + ASSERT_EQ(keypoints_gold.size(), keypoints.size()); + int matchedCount = getMatchedPointsCount(keypoints_gold, keypoints); + double matchedRatio = static_cast(matchedCount) / keypoints_gold.size(); - EXPECT_GT(matchedRatio, 0.95); - } + EXPECT_GT(matchedRatio, 0.95); } GPU_TEST_P(SURF, Descriptor) @@ -199,43 +165,26 @@ GPU_TEST_P(SURF, Descriptor) surf_gold.extended = extended; surf_gold.upright = upright; - if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS)) - { - try - { - std::vector keypoints; - cv::gpu::GpuMat descriptors; - surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors); - } - catch (const cv::Exception& e) - { - ASSERT_EQ(CV_StsNotImplemented, e.code); - } - } - else - { - std::vector keypoints; - surf_gold(image, cv::noArray(), keypoints); + std::vector keypoints; + surf_gold(image, cv::noArray(), keypoints); - cv::gpu::GpuMat descriptors; - surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true); + cv::gpu::GpuMat descriptors; + surf(loadMat(image), cv::gpu::GpuMat(), keypoints, descriptors, true); - cv::Mat descriptors_gold; - surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true); + cv::Mat descriptors_gold; + surf_gold(image, cv::noArray(), keypoints, descriptors_gold, true); - cv::BFMatcher matcher(cv::NORM_L2); - std::vector matches; - matcher.match(descriptors_gold, cv::Mat(descriptors), matches); + cv::BFMatcher matcher(cv::NORM_L2); + std::vector matches; + matcher.match(descriptors_gold, cv::Mat(descriptors), matches); - int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches); - double matchedRatio = static_cast(matchedCount) / keypoints.size(); + int matchedCount = getMatchedPointsCount(keypoints, keypoints, matches); + double matchedRatio = static_cast(matchedCount) / keypoints.size(); - EXPECT_GT(matchedRatio, 0.6); - } + EXPECT_GT(matchedRatio, 0.6); } INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine( - ALL_DEVICES, testing::Values(SURF_HessianThreshold(100.0), SURF_HessianThreshold(500.0), SURF_HessianThreshold(1000.0)), testing::Values(SURF_Octaves(3), SURF_Octaves(4)), testing::Values(SURF_OctaveLayers(2), SURF_OctaveLayers(3)), @@ -245,17 +194,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Features2D, SURF, testing::Combine( ////////////////////////////////////////////////////// // VIBE -PARAM_TEST_CASE(VIBE, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) +PARAM_TEST_CASE(VIBE, cv::Size, MatType, UseRoi) { }; GPU_TEST_P(VIBE, Accuracy) { - const cv::gpu::DeviceInfo devInfo = GET_PARAM(0); - cv::gpu::setDevice(devInfo.deviceID()); - const cv::Size size = GET_PARAM(1); - const int type = GET_PARAM(2); - const bool useRoi = GET_PARAM(3); + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const bool useRoi = GET_PARAM(2); const cv::Mat fullfg(size, CV_8UC1, cv::Scalar::all(255)); @@ -278,7 +225,6 @@ GPU_TEST_P(VIBE, Accuracy) } INSTANTIATE_TEST_CASE_P(GPU_Video, VIBE, testing::Combine( - ALL_DEVICES, DIFFERENT_SIZES, testing::Values(MatType(CV_8UC1), MatType(CV_8UC3), MatType(CV_8UC4)), WHOLE_SUBMAT)); diff --git a/modules/nonfree/test/test_main.cpp b/modules/nonfree/test/test_main.cpp index c9e33a943..6b2499344 100644 --- a/modules/nonfree/test/test_main.cpp +++ b/modules/nonfree/test/test_main.cpp @@ -1,73 +1,3 @@ #include "test_precomp.hpp" -#if defined(HAVE_OPENCV_GPU) && defined(HAVE_CUDA) - -using namespace cv; -using namespace cv::gpu; -using namespace cvtest; -using namespace testing; - -int main(int argc, char **argv) -{ - try - { - const char* keys = - "{ h | help ? | false | Print help}" - "{ i | info | false | Print information about system and exit }" - "{ d | device | -1 | Device on which tests will be executed (-1 means all devices) }" - ; - - CommandLineParser cmd(argc, (const char**)argv, keys); - - if (cmd.get("help")) - { - cmd.printParams(); - return 0; - } - - printCudaInfo(); - - if (cmd.get("info")) - { - return 0; - } - - int device = cmd.get("device"); - if (device < 0) - { - DeviceManager::instance().loadAll(); - - std::cout << "Run tests on all supported devices \n" << std::endl; - } - else - { - DeviceManager::instance().load(device); - - DeviceInfo info(device); - std::cout << "Run tests on device " << device << " [" << info.name() << "] \n" << std::endl; -} - - TS::ptr()->init("cv"); - InitGoogleTest(&argc, argv); - - return RUN_ALL_TESTS(); -} - catch (const std::exception& e) - { - std::cerr << e.what() << std::endl; - return -1; - } - catch (...) -{ - std::cerr << "Unknown error" << std::endl; - return -1; - } - - return 0; -} - -#else // HAVE_CUDA - CV_TEST_MAIN("cv") - -#endif // HAVE_CUDA From 8d521d4704fc868dfca8c411ba5ff5ceb5c70207 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 2 Apr 2013 16:49:09 +0400 Subject: [PATCH 35/67] disabled perf tests for gpu VideoWriter and VideoReader --- modules/gpu/perf/perf_video.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index a3df2193e..1ab01a75b 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -1007,7 +1007,7 @@ PERF_TEST_P(Video_Cn_MaxFeatures, Video_GMG, #if defined(HAVE_NVCUVID) && BUILD_WITH_VIDEO_INPUT_SUPPORT -PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) +PERF_TEST_P(Video, DISABLED_Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) { declare.time(20); @@ -1044,7 +1044,7 @@ PERF_TEST_P(Video, Video_VideoReader, Values("gpu/video/768x576.avi", "gpu/video #if defined(HAVE_NVCUVID) && defined(WIN32) -PERF_TEST_P(Video, Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) +PERF_TEST_P(Video, DISABLED_Video_VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/1920x1080.avi")) { declare.time(30); From 4703f4552ab700a54737e1fac1b151c7963b1f66 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Tue, 2 Apr 2013 18:01:20 -0700 Subject: [PATCH 36/67] Experimental MS Media Foundation API support added --- CMakeLists.txt | 13 +- cmake/OpenCVFindLibsVideo.cmake | 13 +- cmake/templates/cvconfig.h.cmake | 7 +- modules/highgui/CMakeLists.txt | 14 +- .../include/opencv2/highgui/highgui_c.h | 1 + modules/highgui/src/cap.cpp | 14 +- modules/highgui/src/cap_dshow.cpp | 3 +- modules/highgui/src/cap_msmf.cpp | 3722 +++++++++++++++++ modules/highgui/src/precomp.hpp | 1 + modules/highgui/test/test_precomp.hpp | 4 +- 10 files changed, 3775 insertions(+), 17 deletions(-) create mode 100644 modules/highgui/src/cap_msmf.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 3a50ac377..9bf394f93 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,7 +140,8 @@ OCV_OPTION(WITH_CSTRIPES "Include C= support" OFF OCV_OPTION(WITH_TIFF "Include TIFF support" ON IF (NOT IOS) ) OCV_OPTION(WITH_UNICAP "Include Unicap support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_V4L "Include Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) ) -OCV_OPTION(WITH_VIDEOINPUT "Build HighGUI with DirectShow support" ON IF WIN32 AND NOT ARM ) +OCV_OPTION(WITH_DSHOW "Build HighGUI with DirectShow support" ON IF (WIN32 AND NOT ARM) ) +OCV_OPTION(WITH_MSMF "Build HighGUI with Media Foundation support" OFF ) OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF IF (NOT ANDROID AND NOT APPLE) ) OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) ) @@ -753,9 +754,13 @@ if(DEFINED WITH_V4L) ELSE "${HAVE_CAMV4L_STR}/${HAVE_CAMV4L2_STR}") endif(DEFINED WITH_V4L) -if(DEFINED WITH_VIDEOINPUT) - status(" DirectShow:" HAVE_VIDEOINPUT THEN YES ELSE NO) -endif(DEFINED WITH_VIDEOINPUT) +if(DEFINED WITH_DSHOW) + status(" DirectShow:" HAVE_DSHOW THEN YES ELSE NO) +endif(DEFINED WITH_DSHOW) + +if(DEFINED WITH_MSMF) + status(" Media Foundation:" HAVE_MSMF THEN YES ELSE NO) +endif(DEFINED WITH_MSMF) if(DEFINED WITH_XIMEA) status(" XIMEA:" HAVE_XIMEA THEN YES ELSE NO) diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index 9cb7f7cf2..fbb47d486 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -184,11 +184,16 @@ if(WITH_FFMPEG) endif(APPLE) endif(WITH_FFMPEG) -# --- VideoInput --- -if(WITH_VIDEOINPUT) +# --- VideoInput/DirectShow --- +if(WITH_DSHOW) # always have VideoInput on Windows - set(HAVE_VIDEOINPUT 1) -endif(WITH_VIDEOINPUT) + set(HAVE_DSHOW 1) +endif(WITH_DSHOW) + +# --- VideoInput/Microsoft Media Foundation --- +if(WITH_MSMF) + check_include_file(Mfapi.h HAVE_MSMF) +endif(WITH_MSMF) # --- Extra HighGUI libs on Windows --- if(WIN32) diff --git a/cmake/templates/cvconfig.h.cmake b/cmake/templates/cvconfig.h.cmake index ff6b5c89a..db46af4b6 100644 --- a/cmake/templates/cvconfig.h.cmake +++ b/cmake/templates/cvconfig.h.cmake @@ -214,8 +214,11 @@ /* AMD's Basic Linear Algebra Subprograms Library*/ #cmakedefine HAVE_CLAMDBLAS -/* VideoInput library */ -#cmakedefine HAVE_VIDEOINPUT +/* DirectShow Video Capture library */ +#cmakedefine HAVE_DSHOW + +/* Microsoft Media Foundation Capture library */ +#cmakedefine HAVE_MSMF /* XIMEA camera support */ #cmakedefine HAVE_XIMEA diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index 2888c9dcd..3eec81d11 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -106,7 +106,19 @@ elseif(APPLE) endif() if(WIN32 AND NOT ARM) - list(APPEND highgui_srcs src/cap_dshow.cpp src/cap_vfw.cpp src/cap_cmu.cpp) + list(APPEND highgui_srcs src/cap_cmu.cpp) +endif() + +if (WIN32 AND HAVE_DSHOW) + list(APPEND highgui_srcs src/cap_dshow.cpp) +endif() + +if (WIN32 AND HAVE_MSMF) + list(APPEND highgui_srcs src/cap_msmf.cpp) +endif() + +if (WIN32 AND HAVE_VFW) + list(APPEND highgui_srcs src/cap_vfw.cpp) endif() if(HAVE_XINE) diff --git a/modules/highgui/include/opencv2/highgui/highgui_c.h b/modules/highgui/include/opencv2/highgui/highgui_c.h index 9c7166fc9..58840cbd3 100644 --- a/modules/highgui/include/opencv2/highgui/highgui_c.h +++ b/modules/highgui/include/opencv2/highgui/highgui_c.h @@ -297,6 +297,7 @@ enum CV_CAP_UNICAP =600, // Unicap drivers CV_CAP_DSHOW =700, // DirectShow (via videoInput) + CV_CAP_MSMF =1400, // Microsoft Media Foundation (via videoInput) CV_CAP_PVAPI =800, // PvAPI, Prosilica GigE SDK diff --git a/modules/highgui/src/cap.cpp b/modules/highgui/src/cap.cpp index 9befa7b91..2c3b3a94c 100644 --- a/modules/highgui/src/cap.cpp +++ b/modules/highgui/src/cap.cpp @@ -114,7 +114,7 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) { int domains[] = { -#ifdef HAVE_VIDEOINPUT +#ifdef HAVE_DSHOW CV_CAP_DSHOW, #endif #if 1 @@ -168,7 +168,8 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) // try every possibly installed camera API for (int i = 0; domains[i] >= 0; i++) { -#if defined(HAVE_VIDEOINPUT) || \ +#if defined(HAVE_DSHOW) || \ + defined(HAVE_MSMF) || \ defined(HAVE_TYZX) || \ defined(HAVE_VFW) || \ defined(HAVE_LIBV4L) || \ @@ -195,7 +196,14 @@ CV_IMPL CvCapture * cvCreateCameraCapture (int index) switch (domains[i]) { -#ifdef HAVE_VIDEOINPUT +#ifdef HAVE_MSMF + case CV_CAP_MSMF: + capture = cvCreateCameraCapture_MSMF (index); + if (capture) + return capture; + break; +#endif +#ifdef HAVE_DSHOW case CV_CAP_DSHOW: capture = cvCreateCameraCapture_DShow (index); if (capture) diff --git a/modules/highgui/src/cap_dshow.cpp b/modules/highgui/src/cap_dshow.cpp index c2513d788..21fb947b1 100644 --- a/modules/highgui/src/cap_dshow.cpp +++ b/modules/highgui/src/cap_dshow.cpp @@ -41,7 +41,7 @@ #include "precomp.hpp" -#if (defined WIN32 || defined _WIN32) && defined HAVE_VIDEOINPUT +#if (defined WIN32 || defined _WIN32) && defined HAVE_DSHOW /* DirectShow-based Video Capturing module is based on @@ -3100,6 +3100,7 @@ HRESULT videoInput::routeCrossbar(ICaptureGraphBuilder2 **ppBuild, IBaseFilter * return hr; } + /********************* Capturing video from camera via DirectShow *********************/ class CvCaptureCAM_DShow : public CvCapture diff --git a/modules/highgui/src/cap_msmf.cpp b/modules/highgui/src/cap_msmf.cpp new file mode 100644 index 000000000..dbbad6f85 --- /dev/null +++ b/modules/highgui/src/cap_msmf.cpp @@ -0,0 +1,3722 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// Intel License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000, Intel Corporation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of Intel Corporation may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#if (defined WIN32 || defined _WIN32) && defined HAVE_MSMF + +/* + Media Foundation-based Video Capturing module is based on + videoInput library by Evgeny Pereguda: + http://www.codeproject.com/Articles/559437/Capturing-of-video-from-web-camera-on-Windows-7-an + + Originaly licensed under The Code Project Open License (CPOL) 1.02: + http://www.codeproject.com/info/cpol10.aspx +*/ + +#include +#include +#include +#include +#include +#include +#include "Strsafe.h" + +#include +#include +#include +#include + +#include +#include +#include + +#pragma warning(disable:4503) + +#pragma comment(lib, "mfplat") +#pragma comment(lib, "mf") +#pragma comment(lib, "mfuuid") +#pragma comment(lib, "Strmiids") +#pragma comment(lib, "MinCore_Downlevel") + +struct IMFMediaType; +struct IMFActivate; +struct IMFMediaSource; +struct IMFAttributes; + +namespace +{ + +template void SafeRelease(T **ppT) +{ + if (*ppT) + { + (*ppT)->Release(); + *ppT = NULL; + } +} + + /// Class for printing info into consol +class DebugPrintOut +{ +public: + ~DebugPrintOut(void); + static DebugPrintOut& getInstance(); + void printOut(const wchar_t *format, ...); + void setVerbose(bool state); + bool verbose; + +private: + DebugPrintOut(void); +}; + +// Structure for collecting info about types of video, which are supported by current video device +struct MediaType +{ + unsigned int MF_MT_FRAME_SIZE; + + unsigned int height; + + unsigned int width; + + unsigned int MF_MT_YUV_MATRIX; + + unsigned int MF_MT_VIDEO_LIGHTING; + + unsigned int MF_MT_DEFAULT_STRIDE; + + unsigned int MF_MT_VIDEO_CHROMA_SITING; + + GUID MF_MT_AM_FORMAT_TYPE; + + wchar_t *pMF_MT_AM_FORMAT_TYPEName; + + unsigned int MF_MT_FIXED_SIZE_SAMPLES; + + unsigned int MF_MT_VIDEO_NOMINAL_RANGE; + + unsigned int MF_MT_FRAME_RATE; + + unsigned int MF_MT_FRAME_RATE_low; + + unsigned int MF_MT_PIXEL_ASPECT_RATIO; + + unsigned int MF_MT_PIXEL_ASPECT_RATIO_low; + + unsigned int MF_MT_ALL_SAMPLES_INDEPENDENT; + + unsigned int MF_MT_FRAME_RATE_RANGE_MIN; + + unsigned int MF_MT_FRAME_RATE_RANGE_MIN_low; + + unsigned int MF_MT_SAMPLE_SIZE; + + unsigned int MF_MT_VIDEO_PRIMARIES; + + unsigned int MF_MT_INTERLACE_MODE; + + unsigned int MF_MT_FRAME_RATE_RANGE_MAX; + + unsigned int MF_MT_FRAME_RATE_RANGE_MAX_low; + + GUID MF_MT_MAJOR_TYPE; + + GUID MF_MT_SUBTYPE; + + wchar_t *pMF_MT_MAJOR_TYPEName; + wchar_t *pMF_MT_SUBTYPEName; + + MediaType(); + ~MediaType(); + void Clear(); +}; + +/// Class for parsing info from IMFMediaType into the local MediaType +class FormatReader +{ +public: + static MediaType Read(IMFMediaType *pType); + ~FormatReader(void); +private: + FormatReader(void); +}; + +DWORD WINAPI MainThreadFunction( LPVOID lpParam ); + +typedef void(*emergensyStopEventCallback)(int, void *); + +typedef unsigned char BYTE; + +class RawImage +{ +public: + ~RawImage(void); + // Function of creation of the instance of the class + static long CreateInstance(RawImage **ppRImage,unsigned int size); + + void setCopy(const BYTE * pSampleBuffer); + void fastCopy(const BYTE * pSampleBuffer); + unsigned char * getpPixels(); + bool isNew(); + unsigned int getSize(); + +private: + + bool ri_new; + unsigned int ri_size; + unsigned char *ri_pixels; + RawImage(unsigned int size); +}; + +// Class for grabbing image from video stream +class ImageGrabber : public IMFSampleGrabberSinkCallback +{ +public: + ~ImageGrabber(void); + + HRESULT initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat); + + HRESULT startGrabbing(void); + + void stopGrabbing(); + + RawImage *getRawImage(); + + // Function of creation of the instance of the class + static HRESULT CreateInstance(ImageGrabber **ppIG,unsigned int deviceID); + +private: + + bool ig_RIE; + + bool ig_Close; + + long m_cRef; + + unsigned int ig_DeviceID; + + IMFMediaSource *ig_pSource; + + IMFMediaSession *ig_pSession; + + IMFTopology *ig_pTopology; + + RawImage *ig_RIFirst; + + RawImage *ig_RISecond; + + RawImage *ig_RIOut; + + ImageGrabber(unsigned int deviceID); + + HRESULT CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo); + + HRESULT AddSourceNode( + IMFTopology *pTopology, + IMFMediaSource *pSource, + IMFPresentationDescriptor *pPD, + IMFStreamDescriptor *pSD, + IMFTopologyNode **ppNode); + + HRESULT AddOutputNode( + IMFTopology *pTopology, + IMFActivate *pActivate, + DWORD dwId, + IMFTopologyNode **ppNode); + + // IUnknown methods + STDMETHODIMP QueryInterface(REFIID iid, void** ppv); + STDMETHODIMP_(ULONG) AddRef(); + STDMETHODIMP_(ULONG) Release(); + + // IMFClockStateSink methods + STDMETHODIMP OnClockStart(MFTIME hnsSystemTime, LONGLONG llClockStartOffset); + STDMETHODIMP OnClockStop(MFTIME hnsSystemTime); + STDMETHODIMP OnClockPause(MFTIME hnsSystemTime); + STDMETHODIMP OnClockRestart(MFTIME hnsSystemTime); + STDMETHODIMP OnClockSetRate(MFTIME hnsSystemTime, float flRate); + + // IMFSampleGrabberSinkCallback methods + STDMETHODIMP OnSetPresentationClock(IMFPresentationClock* pClock); + STDMETHODIMP OnProcessSample(REFGUID guidMajorMediaType, DWORD dwSampleFlags, + LONGLONG llSampleTime, LONGLONG llSampleDuration, const BYTE * pSampleBuffer, + DWORD dwSampleSize); + STDMETHODIMP OnShutdown(); +}; + +/// Class for controlling of thread of the grabbing raw data from video device +class ImageGrabberThread +{ + friend DWORD WINAPI MainThreadFunction( LPVOID lpParam ); + +public: + ~ImageGrabberThread(void); + + static HRESULT CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID); + + void start(); + + void stop(); + + void setEmergencyStopEvent(void *userData, void(*func)(int, void *)); + + ImageGrabber *getImageGrabber(); + +protected: + + virtual void run(); + +private: + + ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID); + + HANDLE igt_Handle; + + DWORD igt_ThreadIdArray; + + ImageGrabber *igt_pImageGrabber; + + emergensyStopEventCallback igt_func; + + void *igt_userData; + + bool igt_stop; + + unsigned int igt_DeviceID; +}; + +// Structure for collecting info about one parametr of current video device +struct Parametr +{ + long CurrentValue; + + long Min; + + long Max; + + long Step; + + long Default; + + long Flag; + + Parametr(); +}; + +// Structure for collecting info about 17 parametrs of current video device +struct CamParametrs +{ + Parametr Brightness; + Parametr Contrast; + Parametr Hue; + Parametr Saturation; + Parametr Sharpness; + Parametr Gamma; + Parametr ColorEnable; + Parametr WhiteBalance; + Parametr BacklightCompensation; + Parametr Gain; + + Parametr Pan; + Parametr Tilt; + Parametr Roll; + Parametr Zoom; + Parametr Exposure; + Parametr Iris; + Parametr Focus; +}; + +typedef std::wstring String; + +typedef std::vector vectorNum; + +typedef std::map SUBTYPEMap; + +typedef std::map FrameRateMap; + +typedef void(*emergensyStopEventCallback)(int, void *); + +/// Class for controlling of video device +class videoDevice +{ + +public: + videoDevice(void); + ~videoDevice(void); + + void closeDevice(); + + CamParametrs getParametrs(); + + void setParametrs(CamParametrs parametrs); + + void setEmergencyStopEvent(void *userData, void(*func)(int, void *)); + + long readInfoOfDevice(IMFActivate *pActivate, unsigned int Num); + + wchar_t *getName(); + + int getCountFormats(); + + unsigned int getWidth(); + + unsigned int getHeight(); + + MediaType getFormat(unsigned int id); + + bool setupDevice(unsigned int w, unsigned int h, unsigned int idealFramerate = 0); + + bool setupDevice(unsigned int id); + + bool isDeviceSetup(); + + bool isDeviceMediaSource(); + + bool isDeviceRawDataSource(); + + bool isFrameNew(); + + IMFMediaSource *getMediaSource(); + + RawImage *getRawImageOut(); + +private: + + enum typeLock + { + MediaSourceLock, + + RawDataLock, + + OpenLock + + } vd_LockOut; + + wchar_t *vd_pFriendlyName; + + ImageGrabberThread *vd_pImGrTh; + + CamParametrs vd_PrevParametrs; + + unsigned int vd_Width; + + unsigned int vd_Height; + + unsigned int vd_CurrentNumber; + + bool vd_IsSetuped; + + std::map vd_CaptureFormats; + + std::vector vd_CurrentFormats; + + IMFMediaSource *vd_pSource; + + emergensyStopEventCallback vd_func; + + void *vd_userData; + + long enumerateCaptureFormats(IMFMediaSource *pSource); + + long setDeviceFormat(IMFMediaSource *pSource, unsigned long dwFormatIndex); + + void buildLibraryofTypes(); + + int findType(unsigned int size, unsigned int frameRate = 0); + + long resetDevice(IMFActivate *pActivate); + + long initDevice(); + + long checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice); +}; + + +/// Class for managing of list of video devices +class videoDevices +{ +public: + ~videoDevices(void); + + long initDevices(IMFAttributes *pAttributes); + + static videoDevices& getInstance(); + + videoDevice *getDevice(unsigned int i); + + unsigned int getCount(); + + void clearDevices(); + +private: + + UINT32 count; + + std::vector vds_Devices; + + videoDevices(void); +}; + +// Class for creating of Media Foundation context +class Media_Foundation +{ +public: + virtual ~Media_Foundation(void); + static Media_Foundation& getInstance(); + bool buildListOfDevices(); + +private: + Media_Foundation(void); + +}; + +/// The only visiable class for controlling of video devices in format singelton +class videoInput +{ +public: + virtual ~videoInput(void); + + // Getting of static instance of videoInput class + static videoInput& getInstance(); + + // Closing video device with deviceID + void closeDevice(int deviceID); + + // Setting callback function for emergency events(for example: removing video device with deviceID) with userData + void setEmergencyStopEvent(int deviceID, void *userData, void(*func)(int, void *)); + + // Closing all devices + void closeAllDevices(); + + // Getting of parametrs of video device with deviceID + CamParametrs getParametrs(int deviceID); + + // Setting of parametrs of video device with deviceID + void setParametrs(int deviceID, CamParametrs parametrs); + + // Getting numbers of existence videodevices with listing in consol + unsigned int listDevices(bool silent = false); + + // Getting numbers of formats, which are supported by videodevice with deviceID + unsigned int getCountFormats(int deviceID); + + // Getting width of image, which is getting from videodevice with deviceID + unsigned int getWidth(int deviceID); + + // Getting height of image, which is getting from videodevice with deviceID + unsigned int getHeight(int deviceID); + + // Getting name of videodevice with deviceID + wchar_t *getNameVideoDevice(int deviceID); + + // Getting interface MediaSource for Media Foundation from videodevice with deviceID + IMFMediaSource *getMediaSource(int deviceID); + + // Getting format with id, which is supported by videodevice with deviceID + MediaType getFormat(int deviceID, int unsigned id); + + // Checking of existence of the suitable video devices + bool isDevicesAcceable(); + + // Checking of using the videodevice with deviceID + bool isDeviceSetup(int deviceID); + + // Checking of using MediaSource from videodevice with deviceID + bool isDeviceMediaSource(int deviceID); + + // Checking of using Raw Data of pixels from videodevice with deviceID + bool isDeviceRawDataSource(int deviceID); + + // Setting of the state of outprinting info in console + static void setVerbose(bool state); + + // Initialization of video device with deviceID by media type with id + bool setupDevice(int deviceID, unsigned int id = 0); + + // Initialization of video device with deviceID by wisth w, height h and fps idealFramerate + bool setupDevice(int deviceID, unsigned int w, unsigned int h, unsigned int idealFramerate = 30); + + // Checking of recivig of new frame from video device with deviceID + bool isFrameNew(int deviceID); + + // Writing of Raw Data pixels from video device with deviceID with correction of RedAndBlue flipping flipRedAndBlue and vertical flipping flipImage + bool getPixels(int deviceID, unsigned char * pixels, bool flipRedAndBlue = false, bool flipImage = false); + +private: + + bool accessToDevices; + + videoInput(void); + + void processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip); + + void updateListOfDevices(); +}; + +DebugPrintOut::DebugPrintOut(void):verbose(true) +{ +} + +DebugPrintOut::~DebugPrintOut(void) +{ +} + +DebugPrintOut& DebugPrintOut::getInstance() +{ + static DebugPrintOut instance; + + return instance; +} + +void DebugPrintOut::printOut(const wchar_t *format, ...) +{ + if(verbose) + { + int i = 0; + + wchar_t *p = NULL; + + va_list args; + + va_start(args, format); + + if(wcscmp(format, L"%i")) + { + i = va_arg (args, int); + } + + if(wcscmp(format, L"%s")) + { + p = va_arg (args, wchar_t *); + } + + wprintf(format, i,p); + + va_end (args); + } +} + +void DebugPrintOut::setVerbose(bool state) +{ + verbose = state; +} + +LPCWSTR GetGUIDNameConstNew(const GUID& guid); +HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz); + +HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index); +HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out); + +unsigned int *GetParametr(GUID guid, MediaType &out) +{ + if(guid == MF_MT_YUV_MATRIX) + return &(out.MF_MT_YUV_MATRIX); + + if(guid == MF_MT_VIDEO_LIGHTING) + return &(out.MF_MT_VIDEO_LIGHTING); + + if(guid == MF_MT_DEFAULT_STRIDE) + return &(out.MF_MT_DEFAULT_STRIDE); + + if(guid == MF_MT_VIDEO_CHROMA_SITING) + return &(out.MF_MT_VIDEO_CHROMA_SITING); + + if(guid == MF_MT_VIDEO_NOMINAL_RANGE) + return &(out.MF_MT_VIDEO_NOMINAL_RANGE); + + if(guid == MF_MT_ALL_SAMPLES_INDEPENDENT) + return &(out.MF_MT_ALL_SAMPLES_INDEPENDENT); + + if(guid == MF_MT_FIXED_SIZE_SAMPLES) + return &(out.MF_MT_FIXED_SIZE_SAMPLES); + + if(guid == MF_MT_SAMPLE_SIZE) + return &(out.MF_MT_SAMPLE_SIZE); + + if(guid == MF_MT_VIDEO_PRIMARIES) + return &(out.MF_MT_VIDEO_PRIMARIES); + + if(guid == MF_MT_INTERLACE_MODE) + return &(out.MF_MT_INTERLACE_MODE); + + return NULL; +} + +HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index, MediaType &out) +{ + WCHAR *pGuidName = NULL; + WCHAR *pGuidValName = NULL; + + GUID guid = { 0 }; + + PROPVARIANT var; + PropVariantInit(&var); + + HRESULT hr = pAttr->GetItemByIndex(index, &guid, &var); + + if (FAILED(hr)) + { + goto done; + } + + hr = GetGUIDNameNew(guid, &pGuidName); + + if (FAILED(hr)) + { + goto done; + } + + hr = SpecialCaseAttributeValueNew(guid, var, out); + + unsigned int *p; + + if (FAILED(hr)) + { + goto done; + } + if (hr == S_FALSE) + { + switch (var.vt) + { + case VT_UI4: + + p = GetParametr(guid, out); + + if(p) + { + *p = var.ulVal; + } + + break; + + case VT_UI8: + + break; + + case VT_R8: + + break; + + case VT_CLSID: + if(guid == MF_MT_AM_FORMAT_TYPE) + { + hr = GetGUIDNameNew(*var.puuid, &pGuidValName); + + if (SUCCEEDED(hr)) + { + out.MF_MT_AM_FORMAT_TYPE = MF_MT_AM_FORMAT_TYPE; + + out.pMF_MT_AM_FORMAT_TYPEName = pGuidValName; + + pGuidValName = NULL; + } + } + + if(guid == MF_MT_MAJOR_TYPE) + { + hr = GetGUIDNameNew(*var.puuid, &pGuidValName); + + if (SUCCEEDED(hr)) + { + out.MF_MT_MAJOR_TYPE = MF_MT_MAJOR_TYPE; + + out.pMF_MT_MAJOR_TYPEName = pGuidValName; + + pGuidValName = NULL; + } + } + + if(guid == MF_MT_SUBTYPE) + { + hr = GetGUIDNameNew(*var.puuid, &pGuidValName); + + if (SUCCEEDED(hr)) + { + out.MF_MT_SUBTYPE = MF_MT_SUBTYPE; + + out.pMF_MT_SUBTYPEName = pGuidValName; + + pGuidValName = NULL; + } + } + + break; + + case VT_LPWSTR: + + break; + + case VT_VECTOR | VT_UI1: + + break; + + case VT_UNKNOWN: + + break; + + default: + + break; + } + } + +done: + CoTaskMemFree(pGuidName); + CoTaskMemFree(pGuidValName); + PropVariantClear(&var); + return hr; +} + +HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz) +{ + HRESULT hr = S_OK; + WCHAR *pName = NULL; + + LPCWSTR pcwsz = GetGUIDNameConstNew(guid); + if (pcwsz) + { + size_t cchLength = 0; + + hr = StringCchLengthW(pcwsz, STRSAFE_MAX_CCH, &cchLength); + if (FAILED(hr)) + { + goto done; + } + + pName = (WCHAR*)CoTaskMemAlloc((cchLength + 1) * sizeof(WCHAR)); + + if (pName == NULL) + { + hr = E_OUTOFMEMORY; + goto done; + } + + hr = StringCchCopyW(pName, cchLength + 1, pcwsz); + if (FAILED(hr)) + { + goto done; + } + } + else + { + hr = StringFromCLSID(guid, &pName); + } + +done: + if (FAILED(hr)) + { + *ppwsz = NULL; + CoTaskMemFree(pName); + } + else + { + *ppwsz = pName; + } + return hr; +} + +void LogUINT32AsUINT64New(const PROPVARIANT& var, UINT32 &uHigh, UINT32 &uLow) +{ + Unpack2UINT32AsUINT64(var.uhVal.QuadPart, &uHigh, &uLow); + +} + +float OffsetToFloatNew(const MFOffset& offset) +{ + return offset.value + (static_cast(offset.fract) / 65536.0f); +} + +HRESULT LogVideoAreaNew(const PROPVARIANT& var) +{ + if (var.caub.cElems < sizeof(MFVideoArea)) + { + return S_OK; + } + + return S_OK; +} + +HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out) +{ + if (guid == MF_MT_FRAME_SIZE) + { + UINT32 uHigh = 0, uLow = 0; + + LogUINT32AsUINT64New(var, uHigh, uLow); + + out.width = uHigh; + + out.height = uLow; + + out.MF_MT_FRAME_SIZE = out.width * out.height; + } + else + if (guid == MF_MT_FRAME_RATE) + { + UINT32 uHigh = 0, uLow = 0; + + LogUINT32AsUINT64New(var, uHigh, uLow); + + out.MF_MT_FRAME_RATE = uHigh; + + out.MF_MT_FRAME_RATE_low = uLow; + } + else + if (guid == MF_MT_FRAME_RATE_RANGE_MAX) + { + UINT32 uHigh = 0, uLow = 0; + + LogUINT32AsUINT64New(var, uHigh, uLow); + + out.MF_MT_FRAME_RATE_RANGE_MAX = uHigh; + + out.MF_MT_FRAME_RATE_RANGE_MAX_low = uLow; + } + else + if (guid == MF_MT_FRAME_RATE_RANGE_MIN) + { + UINT32 uHigh = 0, uLow = 0; + + LogUINT32AsUINT64New(var, uHigh, uLow); + + out.MF_MT_FRAME_RATE_RANGE_MIN = uHigh; + + out.MF_MT_FRAME_RATE_RANGE_MIN_low = uLow; + } + else + if (guid == MF_MT_PIXEL_ASPECT_RATIO) + { + UINT32 uHigh = 0, uLow = 0; + + LogUINT32AsUINT64New(var, uHigh, uLow); + + out.MF_MT_PIXEL_ASPECT_RATIO = uHigh; + + out.MF_MT_PIXEL_ASPECT_RATIO_low = uLow; + } + else + { + return S_FALSE; + } + return S_OK; +} + +#ifndef IF_EQUAL_RETURN +#define IF_EQUAL_RETURN(param, val) if(val == param) return L#val +#endif + +LPCWSTR GetGUIDNameConstNew(const GUID& guid) +{ + IF_EQUAL_RETURN(guid, MF_MT_MAJOR_TYPE); + IF_EQUAL_RETURN(guid, MF_MT_MAJOR_TYPE); + IF_EQUAL_RETURN(guid, MF_MT_SUBTYPE); + IF_EQUAL_RETURN(guid, MF_MT_ALL_SAMPLES_INDEPENDENT); + IF_EQUAL_RETURN(guid, MF_MT_FIXED_SIZE_SAMPLES); + IF_EQUAL_RETURN(guid, MF_MT_COMPRESSED); + IF_EQUAL_RETURN(guid, MF_MT_SAMPLE_SIZE); + IF_EQUAL_RETURN(guid, MF_MT_WRAPPED_TYPE); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_NUM_CHANNELS); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_SAMPLES_PER_SECOND); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_FLOAT_SAMPLES_PER_SECOND); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_AVG_BYTES_PER_SECOND); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_BLOCK_ALIGNMENT); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_BITS_PER_SAMPLE); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_VALID_BITS_PER_SAMPLE); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_SAMPLES_PER_BLOCK); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_CHANNEL_MASK); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_FOLDDOWN_MATRIX); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_WMADRC_PEAKREF); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_WMADRC_PEAKTARGET); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_WMADRC_AVGREF); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_WMADRC_AVGTARGET); + IF_EQUAL_RETURN(guid, MF_MT_AUDIO_PREFER_WAVEFORMATEX); + IF_EQUAL_RETURN(guid, MF_MT_AAC_PAYLOAD_TYPE); + IF_EQUAL_RETURN(guid, MF_MT_AAC_AUDIO_PROFILE_LEVEL_INDICATION); + IF_EQUAL_RETURN(guid, MF_MT_FRAME_SIZE); + IF_EQUAL_RETURN(guid, MF_MT_FRAME_RATE); + IF_EQUAL_RETURN(guid, MF_MT_FRAME_RATE_RANGE_MAX); + IF_EQUAL_RETURN(guid, MF_MT_FRAME_RATE_RANGE_MIN); + IF_EQUAL_RETURN(guid, MF_MT_PIXEL_ASPECT_RATIO); + IF_EQUAL_RETURN(guid, MF_MT_DRM_FLAGS); + IF_EQUAL_RETURN(guid, MF_MT_PAD_CONTROL_FLAGS); + IF_EQUAL_RETURN(guid, MF_MT_SOURCE_CONTENT_HINT); + IF_EQUAL_RETURN(guid, MF_MT_VIDEO_CHROMA_SITING); + IF_EQUAL_RETURN(guid, MF_MT_INTERLACE_MODE); + IF_EQUAL_RETURN(guid, MF_MT_TRANSFER_FUNCTION); + IF_EQUAL_RETURN(guid, MF_MT_VIDEO_PRIMARIES); + IF_EQUAL_RETURN(guid, MF_MT_CUSTOM_VIDEO_PRIMARIES); + IF_EQUAL_RETURN(guid, MF_MT_YUV_MATRIX); + IF_EQUAL_RETURN(guid, MF_MT_VIDEO_LIGHTING); + IF_EQUAL_RETURN(guid, MF_MT_VIDEO_NOMINAL_RANGE); + IF_EQUAL_RETURN(guid, MF_MT_GEOMETRIC_APERTURE); + IF_EQUAL_RETURN(guid, MF_MT_MINIMUM_DISPLAY_APERTURE); + IF_EQUAL_RETURN(guid, MF_MT_PAN_SCAN_APERTURE); + IF_EQUAL_RETURN(guid, MF_MT_PAN_SCAN_ENABLED); + IF_EQUAL_RETURN(guid, MF_MT_AVG_BITRATE); + IF_EQUAL_RETURN(guid, MF_MT_AVG_BIT_ERROR_RATE); + IF_EQUAL_RETURN(guid, MF_MT_MAX_KEYFRAME_SPACING); + IF_EQUAL_RETURN(guid, MF_MT_DEFAULT_STRIDE); + IF_EQUAL_RETURN(guid, MF_MT_PALETTE); + IF_EQUAL_RETURN(guid, MF_MT_USER_DATA); + IF_EQUAL_RETURN(guid, MF_MT_AM_FORMAT_TYPE); + IF_EQUAL_RETURN(guid, MF_MT_MPEG_START_TIME_CODE); + IF_EQUAL_RETURN(guid, MF_MT_MPEG2_PROFILE); + IF_EQUAL_RETURN(guid, MF_MT_MPEG2_LEVEL); + IF_EQUAL_RETURN(guid, MF_MT_MPEG2_FLAGS); + IF_EQUAL_RETURN(guid, MF_MT_MPEG_SEQUENCE_HEADER); + IF_EQUAL_RETURN(guid, MF_MT_DV_AAUX_SRC_PACK_0); + IF_EQUAL_RETURN(guid, MF_MT_DV_AAUX_CTRL_PACK_0); + IF_EQUAL_RETURN(guid, MF_MT_DV_AAUX_SRC_PACK_1); + IF_EQUAL_RETURN(guid, MF_MT_DV_AAUX_CTRL_PACK_1); + IF_EQUAL_RETURN(guid, MF_MT_DV_VAUX_SRC_PACK); + IF_EQUAL_RETURN(guid, MF_MT_DV_VAUX_CTRL_PACK); + IF_EQUAL_RETURN(guid, MF_MT_ARBITRARY_HEADER); + IF_EQUAL_RETURN(guid, MF_MT_ARBITRARY_FORMAT); + IF_EQUAL_RETURN(guid, MF_MT_IMAGE_LOSS_TOLERANT); + IF_EQUAL_RETURN(guid, MF_MT_MPEG4_SAMPLE_DESCRIPTION); + IF_EQUAL_RETURN(guid, MF_MT_MPEG4_CURRENT_SAMPLE_ENTRY); + IF_EQUAL_RETURN(guid, MF_MT_ORIGINAL_4CC); + IF_EQUAL_RETURN(guid, MF_MT_ORIGINAL_WAVE_FORMAT_TAG); + + // Media types + + IF_EQUAL_RETURN(guid, MFMediaType_Audio); + IF_EQUAL_RETURN(guid, MFMediaType_Video); + IF_EQUAL_RETURN(guid, MFMediaType_Protected); + IF_EQUAL_RETURN(guid, MFMediaType_SAMI); + IF_EQUAL_RETURN(guid, MFMediaType_Script); + IF_EQUAL_RETURN(guid, MFMediaType_Image); + IF_EQUAL_RETURN(guid, MFMediaType_HTML); + IF_EQUAL_RETURN(guid, MFMediaType_Binary); + IF_EQUAL_RETURN(guid, MFMediaType_FileTransfer); + + IF_EQUAL_RETURN(guid, MFVideoFormat_AI44); // FCC('AI44') + IF_EQUAL_RETURN(guid, MFVideoFormat_ARGB32); // D3DFMT_A8R8G8B8 + IF_EQUAL_RETURN(guid, MFVideoFormat_AYUV); // FCC('AYUV') + IF_EQUAL_RETURN(guid, MFVideoFormat_DV25); // FCC('dv25') + IF_EQUAL_RETURN(guid, MFVideoFormat_DV50); // FCC('dv50') + IF_EQUAL_RETURN(guid, MFVideoFormat_DVH1); // FCC('dvh1') + IF_EQUAL_RETURN(guid, MFVideoFormat_DVSD); // FCC('dvsd') + IF_EQUAL_RETURN(guid, MFVideoFormat_DVSL); // FCC('dvsl') + IF_EQUAL_RETURN(guid, MFVideoFormat_H264); // FCC('H264') + IF_EQUAL_RETURN(guid, MFVideoFormat_I420); // FCC('I420') + IF_EQUAL_RETURN(guid, MFVideoFormat_IYUV); // FCC('IYUV') + IF_EQUAL_RETURN(guid, MFVideoFormat_M4S2); // FCC('M4S2') + IF_EQUAL_RETURN(guid, MFVideoFormat_MJPG); + IF_EQUAL_RETURN(guid, MFVideoFormat_MP43); // FCC('MP43') + IF_EQUAL_RETURN(guid, MFVideoFormat_MP4S); // FCC('MP4S') + IF_EQUAL_RETURN(guid, MFVideoFormat_MP4V); // FCC('MP4V') + IF_EQUAL_RETURN(guid, MFVideoFormat_MPG1); // FCC('MPG1') + IF_EQUAL_RETURN(guid, MFVideoFormat_MSS1); // FCC('MSS1') + IF_EQUAL_RETURN(guid, MFVideoFormat_MSS2); // FCC('MSS2') + IF_EQUAL_RETURN(guid, MFVideoFormat_NV11); // FCC('NV11') + IF_EQUAL_RETURN(guid, MFVideoFormat_NV12); // FCC('NV12') + IF_EQUAL_RETURN(guid, MFVideoFormat_P010); // FCC('P010') + IF_EQUAL_RETURN(guid, MFVideoFormat_P016); // FCC('P016') + IF_EQUAL_RETURN(guid, MFVideoFormat_P210); // FCC('P210') + IF_EQUAL_RETURN(guid, MFVideoFormat_P216); // FCC('P216') + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB24); // D3DFMT_R8G8B8 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB32); // D3DFMT_X8R8G8B8 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB555); // D3DFMT_X1R5G5B5 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB565); // D3DFMT_R5G6B5 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB8); + IF_EQUAL_RETURN(guid, MFVideoFormat_UYVY); // FCC('UYVY') + IF_EQUAL_RETURN(guid, MFVideoFormat_v210); // FCC('v210') + IF_EQUAL_RETURN(guid, MFVideoFormat_v410); // FCC('v410') + IF_EQUAL_RETURN(guid, MFVideoFormat_WMV1); // FCC('WMV1') + IF_EQUAL_RETURN(guid, MFVideoFormat_WMV2); // FCC('WMV2') + IF_EQUAL_RETURN(guid, MFVideoFormat_WMV3); // FCC('WMV3') + IF_EQUAL_RETURN(guid, MFVideoFormat_WVC1); // FCC('WVC1') + IF_EQUAL_RETURN(guid, MFVideoFormat_Y210); // FCC('Y210') + IF_EQUAL_RETURN(guid, MFVideoFormat_Y216); // FCC('Y216') + IF_EQUAL_RETURN(guid, MFVideoFormat_Y410); // FCC('Y410') + IF_EQUAL_RETURN(guid, MFVideoFormat_Y416); // FCC('Y416') + IF_EQUAL_RETURN(guid, MFVideoFormat_Y41P); + IF_EQUAL_RETURN(guid, MFVideoFormat_Y41T); + IF_EQUAL_RETURN(guid, MFVideoFormat_YUY2); // FCC('YUY2') + IF_EQUAL_RETURN(guid, MFVideoFormat_YV12); // FCC('YV12') + IF_EQUAL_RETURN(guid, MFVideoFormat_YVYU); + + IF_EQUAL_RETURN(guid, MFAudioFormat_PCM); // WAVE_FORMAT_PCM + IF_EQUAL_RETURN(guid, MFAudioFormat_Float); // WAVE_FORMAT_IEEE_FLOAT + IF_EQUAL_RETURN(guid, MFAudioFormat_DTS); // WAVE_FORMAT_DTS + IF_EQUAL_RETURN(guid, MFAudioFormat_Dolby_AC3_SPDIF); // WAVE_FORMAT_DOLBY_AC3_SPDIF + IF_EQUAL_RETURN(guid, MFAudioFormat_DRM); // WAVE_FORMAT_DRM + IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudioV8); // WAVE_FORMAT_WMAUDIO2 + IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudioV9); // WAVE_FORMAT_WMAUDIO3 + IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudio_Lossless); // WAVE_FORMAT_WMAUDIO_LOSSLESS + IF_EQUAL_RETURN(guid, MFAudioFormat_WMASPDIF); // WAVE_FORMAT_WMASPDIF + IF_EQUAL_RETURN(guid, MFAudioFormat_MSP1); // WAVE_FORMAT_WMAVOICE9 + IF_EQUAL_RETURN(guid, MFAudioFormat_MP3); // WAVE_FORMAT_MPEGLAYER3 + IF_EQUAL_RETURN(guid, MFAudioFormat_MPEG); // WAVE_FORMAT_MPEG + IF_EQUAL_RETURN(guid, MFAudioFormat_AAC); // WAVE_FORMAT_MPEG_HEAAC + IF_EQUAL_RETURN(guid, MFAudioFormat_ADTS); // WAVE_FORMAT_MPEG_ADTS_AAC + + return NULL; +} + +FormatReader::FormatReader(void) +{ +} + +MediaType FormatReader::Read(IMFMediaType *pType) +{ + UINT32 count = 0; + + HRESULT hr = S_OK; + + MediaType out; + + hr = pType->LockStore(); + + if (FAILED(hr)) + { + return out; + } + + hr = pType->GetCount(&count); + + if (FAILED(hr)) + { + return out; + } + + for (UINT32 i = 0; i < count; i++) + { + hr = LogAttributeValueByIndexNew(pType, i, out); + + if (FAILED(hr)) + { + break; + } + } + + hr = pType->UnlockStore(); + + if (FAILED(hr)) + { + return out; + } + + return out; +} + +FormatReader::~FormatReader(void) +{ +} + +#define CHECK_HR(x) if (FAILED(x)) { goto done; } + +ImageGrabber::ImageGrabber(unsigned int deviceID): m_cRef(1), ig_DeviceID(deviceID), ig_pSource(NULL), ig_pSession(NULL), ig_pTopology(NULL), ig_RIE(true), ig_Close(false) +{ +} + + +ImageGrabber::~ImageGrabber(void) +{ + if (ig_pSession) + { + ig_pSession->Shutdown(); + } + + //SafeRelease(&ig_pSession); + + //SafeRelease(&ig_pTopology); + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Destroing instance of the ImageGrabber class \n", ig_DeviceID); + +} + +HRESULT ImageGrabber::initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat) +{ + IMFActivate *pSinkActivate = NULL; + IMFMediaType *pType = NULL; + + IMFPresentationDescriptor *pPD = NULL; + IMFStreamDescriptor *pSD = NULL; + IMFMediaTypeHandler *pHandler = NULL; + IMFMediaType *pCurrentType = NULL; + + HRESULT hr = S_OK; + MediaType MT; + + // Clean up. + if (ig_pSession) + { + ig_pSession->Shutdown(); + } + + SafeRelease(&ig_pSession); + SafeRelease(&ig_pTopology); + + ig_pSource = pSource; + + + + + hr = pSource->CreatePresentationDescriptor(&pPD); + if (FAILED(hr)) + goto err; + + BOOL fSelected; + hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); + if (FAILED(hr)) + goto err; + + hr = pSD->GetMediaTypeHandler(&pHandler); + if (FAILED(hr)) + goto err; + + DWORD cTypes = 0; + hr = pHandler->GetMediaTypeCount(&cTypes); + if (FAILED(hr)) + goto err; + + if(cTypes > 0) + { + hr = pHandler->GetCurrentMediaType(&pCurrentType); + + if (FAILED(hr)) + goto err; + + MT = FormatReader::Read(pCurrentType); + } + +err: + SafeRelease(&pPD); + SafeRelease(&pSD); + SafeRelease(&pHandler); + SafeRelease(&pCurrentType); + + unsigned int sizeRawImage = 0; + + if(VideoFormat == MFVideoFormat_RGB24) + { + sizeRawImage = MT.MF_MT_FRAME_SIZE * 3; + } + else if(VideoFormat == MFVideoFormat_RGB32) + { + sizeRawImage = MT.MF_MT_FRAME_SIZE * 4; + } + + CHECK_HR(hr = RawImage::CreateInstance(&ig_RIFirst, sizeRawImage)); + + CHECK_HR(hr = RawImage::CreateInstance(&ig_RISecond, sizeRawImage)); + + ig_RIOut = ig_RISecond; + + + // Configure the media type that the Sample Grabber will receive. + // Setting the major and subtype is usually enough for the topology loader + // to resolve the topology. + + CHECK_HR(hr = MFCreateMediaType(&pType)); + CHECK_HR(hr = pType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)); + CHECK_HR(hr = pType->SetGUID(MF_MT_SUBTYPE, VideoFormat)); + + // Create the sample grabber sink. + CHECK_HR(hr = MFCreateSampleGrabberSinkActivate(pType, this, &pSinkActivate)); + + // To run as fast as possible, set this attribute (requires Windows 7): + CHECK_HR(hr = pSinkActivate->SetUINT32(MF_SAMPLEGRABBERSINK_IGNORE_CLOCK, TRUE)); + + // Create the Media Session. + + CHECK_HR(hr = MFCreateMediaSession(NULL, &ig_pSession)); + + // Create the topology. + CHECK_HR(hr = CreateTopology(pSource, pSinkActivate, &ig_pTopology)); + +done: + + // Clean up. + if (FAILED(hr)) + { + if (ig_pSession) + { + ig_pSession->Shutdown(); + } + + SafeRelease(&ig_pSession); + SafeRelease(&ig_pTopology); + } + + SafeRelease(&pSinkActivate); + SafeRelease(&pType); + + return hr; +} + +void ImageGrabber::stopGrabbing() +{ + if(ig_pSession) + ig_pSession->Stop(); + + + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Stopping of of grabbing of images\n", ig_DeviceID); +} + +HRESULT ImageGrabber::startGrabbing(void) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + IMFMediaEvent *pEvent = NULL; + + PROPVARIANT var; + PropVariantInit(&var); + + HRESULT hr = S_OK; + CHECK_HR(hr = ig_pSession->SetTopology(0, ig_pTopology)); + CHECK_HR(hr = ig_pSession->Start(&GUID_NULL, &var)); + + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Start Grabbing of the images\n", ig_DeviceID); + + for(;;) + { + + HRESULT hrStatus = S_OK; + MediaEventType met; + + if(!ig_pSession) break; + hr = ig_pSession->GetEvent(0, &pEvent); + if(!SUCCEEDED(hr)) + { + hr = S_OK; + + goto done; + } + + hr = pEvent->GetStatus(&hrStatus); + if(!SUCCEEDED(hr)) + { + hr = S_OK; + + goto done; + } + + hr = pEvent->GetType(&met); + if(!SUCCEEDED(hr)) + { + hr = S_OK; + + goto done; + } + + if (met == MESessionEnded) + { + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MESessionEnded \n", ig_DeviceID); + + ig_pSession->Stop(); + + break; + } + + if (met == MESessionStopped) + { + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MESessionStopped \n", ig_DeviceID); + + break; + } + + + if (met == MEVideoCaptureDeviceRemoved) + { + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MEVideoCaptureDeviceRemoved \n", ig_DeviceID); + + break; + } + + SafeRelease(&pEvent); + } + + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Finish startGrabbing \n", ig_DeviceID); + +done: + SafeRelease(&pEvent); + + SafeRelease(&ig_pSession); + + SafeRelease(&ig_pTopology); + return hr; +} + +HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo) +{ + IMFTopology *pTopology = NULL; + IMFPresentationDescriptor *pPD = NULL; + IMFStreamDescriptor *pSD = NULL; + IMFMediaTypeHandler *pHandler = NULL; + IMFTopologyNode *pNode1 = NULL; + IMFTopologyNode *pNode2 = NULL; + + HRESULT hr = S_OK; + DWORD cStreams = 0; + + CHECK_HR(hr = MFCreateTopology(&pTopology)); + CHECK_HR(hr = pSource->CreatePresentationDescriptor(&pPD)); + CHECK_HR(hr = pPD->GetStreamDescriptorCount(&cStreams)); + + for (DWORD i = 0; i < cStreams; i++) + { + // In this example, we look for audio streams and connect them to the sink. + + BOOL fSelected = FALSE; + GUID majorType; + + CHECK_HR(hr = pPD->GetStreamDescriptorByIndex(i, &fSelected, &pSD)); + CHECK_HR(hr = pSD->GetMediaTypeHandler(&pHandler)); + CHECK_HR(hr = pHandler->GetMajorType(&majorType)); + + if (majorType == MFMediaType_Video && fSelected) + { + CHECK_HR(hr = AddSourceNode(pTopology, pSource, pPD, pSD, &pNode1)); + CHECK_HR(hr = AddOutputNode(pTopology, pSinkActivate, 0, &pNode2)); + CHECK_HR(hr = pNode1->ConnectOutput(0, pNode2, 0)); + break; + } + else + { + CHECK_HR(hr = pPD->DeselectStream(i)); + } + SafeRelease(&pSD); + SafeRelease(&pHandler); + } + + *ppTopo = pTopology; + (*ppTopo)->AddRef(); + +done: + SafeRelease(&pTopology); + SafeRelease(&pNode1); + SafeRelease(&pNode2); + SafeRelease(&pPD); + SafeRelease(&pSD); + SafeRelease(&pHandler); + return hr; +} + +HRESULT ImageGrabber::AddSourceNode( + IMFTopology *pTopology, // Topology. + IMFMediaSource *pSource, // Media source. + IMFPresentationDescriptor *pPD, // Presentation descriptor. + IMFStreamDescriptor *pSD, // Stream descriptor. + IMFTopologyNode **ppNode) // Receives the node pointer. +{ + IMFTopologyNode *pNode = NULL; + + HRESULT hr = S_OK; + CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_SOURCESTREAM_NODE, &pNode)); + CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_SOURCE, pSource)); + CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_PRESENTATION_DESCRIPTOR, pPD)); + CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_STREAM_DESCRIPTOR, pSD)); + CHECK_HR(hr = pTopology->AddNode(pNode)); + + // Return the pointer to the caller. + *ppNode = pNode; + (*ppNode)->AddRef(); + +done: + SafeRelease(&pNode); + return hr; +} + +HRESULT ImageGrabber::AddOutputNode( + IMFTopology *pTopology, // Topology. + IMFActivate *pActivate, // Media sink activation object. + DWORD dwId, // Identifier of the stream sink. + IMFTopologyNode **ppNode) // Receives the node pointer. +{ + IMFTopologyNode *pNode = NULL; + + HRESULT hr = S_OK; + CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_OUTPUT_NODE, &pNode)); + CHECK_HR(hr = pNode->SetObject(pActivate)); + CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_STREAMID, dwId)); + CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_NOSHUTDOWN_ON_REMOVE, FALSE)); + CHECK_HR(hr = pTopology->AddNode(pNode)); + + // Return the pointer to the caller. + *ppNode = pNode; + (*ppNode)->AddRef(); + +done: + SafeRelease(&pNode); + return hr; +} + + + +HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID) +{ + *ppIG = new (std::nothrow) ImageGrabber(deviceID); + + if (ppIG == NULL) + { + return E_OUTOFMEMORY; + } + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Creating instance of ImageGrabber\n", deviceID); + + return S_OK; +} + +STDMETHODIMP ImageGrabber::QueryInterface(REFIID riid, void** ppv) +{ + HRESULT hr = E_NOINTERFACE; + *ppv = NULL; + + if(riid == IID_IUnknown || riid == IID_IMFSampleGrabberSinkCallback) + { + *ppv = static_cast(this); + hr = S_OK; + } + + if(riid == IID_IMFClockStateSink) + { + *ppv = static_cast(this); + hr = S_OK; + } + + if(SUCCEEDED(hr)) + { + reinterpret_cast(*ppv)->AddRef(); + } + + return hr; +} + +STDMETHODIMP_(ULONG) ImageGrabber::AddRef() +{ + return InterlockedIncrement(&m_cRef); +} + +STDMETHODIMP_(ULONG) ImageGrabber::Release() +{ + ULONG cRef = InterlockedDecrement(&m_cRef); + if (cRef == 0) + { + delete this; + } + return cRef; +} + +STDMETHODIMP ImageGrabber::OnClockStart(MFTIME hnsSystemTime, LONGLONG llClockStartOffset) +{ + (void)hnsSystemTime; + (void)llClockStartOffset; + return S_OK; +} + +STDMETHODIMP ImageGrabber::OnClockStop(MFTIME hnsSystemTime) +{ + (void)hnsSystemTime; + return S_OK; +} + +STDMETHODIMP ImageGrabber::OnClockPause(MFTIME hnsSystemTime) +{ + (void)hnsSystemTime; + return S_OK; +} + +STDMETHODIMP ImageGrabber::OnClockRestart(MFTIME hnsSystemTime) +{ + (void)hnsSystemTime; + return S_OK; +} + +STDMETHODIMP ImageGrabber::OnClockSetRate(MFTIME hnsSystemTime, float flRate) +{ + (void)flRate; + (void)hnsSystemTime; + return S_OK; +} + +STDMETHODIMP ImageGrabber::OnSetPresentationClock(IMFPresentationClock* pClock) +{ + (void)pClock; + return S_OK; +} + +STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwSampleFlags, + LONGLONG llSampleTime, LONGLONG llSampleDuration, const BYTE * pSampleBuffer, + DWORD dwSampleSize) +{ + (void)guidMajorMediaType; + (void)llSampleTime; + (void)dwSampleFlags; + (void)llSampleDuration; + (void)dwSampleSize; + + if(ig_RIE) + { + ig_RIFirst->fastCopy(pSampleBuffer); + + ig_RIOut = ig_RIFirst; + } + else + { + ig_RISecond->fastCopy(pSampleBuffer); + + ig_RIOut = ig_RISecond; + } + + ig_RIE = !ig_RIE; + + return S_OK; +} + +STDMETHODIMP ImageGrabber::OnShutdown() +{ + return S_OK; +} + +RawImage *ImageGrabber::getRawImage() +{ + return ig_RIOut; +} + +DWORD WINAPI MainThreadFunction( LPVOID lpParam ) +{ + ImageGrabberThread *pIGT = (ImageGrabberThread *)lpParam; + + pIGT->run(); + + return 0; +} + +HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + *ppIGT = new (std::nothrow) ImageGrabberThread(pSource, deviceID); + + if (ppIGT == NULL) + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Memory cannot be allocated\n", deviceID); + + return E_OUTOFMEMORY; + } + else + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Creating of the instance of ImageGrabberThread\n", deviceID); + + return S_OK; +} + +ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID): igt_Handle(NULL), igt_stop(false) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + HRESULT hr = ImageGrabber::CreateInstance(&igt_pImageGrabber, deviceID); + + igt_DeviceID = deviceID; + + if(SUCCEEDED(hr)) + { + hr = igt_pImageGrabber->initImageGrabber(pSource, MFVideoFormat_RGB24); + + if(!SUCCEEDED(hr)) + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: There is a problem with initialization of the instance of the ImageGrabber class\n", deviceID); + } + else + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Initialization of instance of the ImageGrabber class\n", deviceID); + } + } + else + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i There is a problem with creation of the instance of the ImageGrabber class\n", deviceID); + } +} + +void ImageGrabberThread::setEmergencyStopEvent(void *userData, void(*func)(int, void *)) +{ + if(func) + { + igt_func = func; + + igt_userData = userData; + } +} + +ImageGrabberThread::~ImageGrabberThread(void) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Destroing ImageGrabberThread\n", igt_DeviceID); + + delete igt_pImageGrabber; +} + +void ImageGrabberThread::stop() +{ + igt_stop = true; + + if(igt_pImageGrabber) + { + igt_pImageGrabber->stopGrabbing(); + } +} + +void ImageGrabberThread::start() +{ + igt_Handle = CreateThread( + NULL, // default security attributes + 0, // use default stack size + MainThreadFunction, // thread function name + this, // argument to thread function + 0, // use default creation flags + &igt_ThreadIdArray); // returns the thread identifier +} + +void ImageGrabberThread::run() +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if(igt_pImageGrabber) + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Thread for grabbing images is started\n", igt_DeviceID); + + HRESULT hr = igt_pImageGrabber->startGrabbing(); + + if(!SUCCEEDED(hr)) + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: There is a problem with starting the process of grabbing\n", igt_DeviceID); + } + + } + else + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i The thread is finished without execution of grabbing\n", igt_DeviceID); + } + + + if(!igt_stop) + { + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Emergency Stop thread\n", igt_DeviceID); + + if(igt_func) + { + igt_func(igt_DeviceID, igt_userData); + } + } + else + DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Finish thread\n", igt_DeviceID); +} + +ImageGrabber *ImageGrabberThread::getImageGrabber() +{ + return igt_pImageGrabber; +} + +Media_Foundation::Media_Foundation(void) +{ + HRESULT hr = MFStartup(MF_VERSION); + + if(!SUCCEEDED(hr)) + { + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"MEDIA FOUNDATION: It cannot be created!!!\n"); + } +} + +Media_Foundation::~Media_Foundation(void) +{ + HRESULT hr = MFShutdown(); + + if(!SUCCEEDED(hr)) + { + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"MEDIA FOUNDATION: Resources cannot be released\n"); + } +} + +bool Media_Foundation::buildListOfDevices() +{ + HRESULT hr = S_OK; + + IMFAttributes *pAttributes = NULL; + + CoInitialize(NULL); + + hr = MFCreateAttributes(&pAttributes, 1); + + if (SUCCEEDED(hr)) + { + hr = pAttributes->SetGUID( + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID + ); + } + + if (SUCCEEDED(hr)) + { + videoDevices *vDs = &videoDevices::getInstance(); + hr = vDs->initDevices(pAttributes); + } + else + { + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + DPO->printOut(L"MEDIA FOUNDATION: The access to the video cameras denied\n"); + } + + SafeRelease(&pAttributes); + + return (SUCCEEDED(hr)); +} + +Media_Foundation& Media_Foundation::getInstance() +{ + static Media_Foundation instance; + + return instance; +} + +RawImage::RawImage(unsigned int size): ri_new(false), ri_pixels(NULL) +{ + ri_size = size; + + ri_pixels = new unsigned char[size]; + + memset((void *)ri_pixels,0,ri_size); +} + +bool RawImage::isNew() +{ + return ri_new; +} + +unsigned int RawImage::getSize() +{ + return ri_size; +} + +RawImage::~RawImage(void) +{ + delete []ri_pixels; + + ri_pixels = NULL; +} + +long RawImage::CreateInstance(RawImage **ppRImage,unsigned int size) +{ + *ppRImage = new (std::nothrow) RawImage(size); + + if (ppRImage == NULL) + { + return E_OUTOFMEMORY; + } + return S_OK; +} + +void RawImage::setCopy(const BYTE * pSampleBuffer) +{ + memcpy(ri_pixels, pSampleBuffer, ri_size); + + ri_new = true; +} + +void RawImage::fastCopy(const BYTE * pSampleBuffer) +{ + memcpy(ri_pixels, pSampleBuffer, ri_size); + ri_new = true; +} + +unsigned char * RawImage::getpPixels() +{ + ri_new = false; + return ri_pixels; +} + +videoDevice::videoDevice(void): vd_IsSetuped(false), vd_LockOut(OpenLock), vd_pFriendlyName(NULL), + vd_Width(0), vd_Height(0), vd_pSource(NULL), vd_func(NULL), vd_userData(NULL) +{ +} + +void videoDevice::setParametrs(CamParametrs parametrs) +{ + if(vd_IsSetuped) + { + if(vd_pSource) + { + Parametr *pParametr = (Parametr *)(¶metrs); + + Parametr *pPrevParametr = (Parametr *)(&vd_PrevParametrs); + + IAMVideoProcAmp *pProcAmp = NULL; + HRESULT hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcAmp)); + + if (SUCCEEDED(hr)) + { + for(unsigned int i = 0; i < 10; i++) + { + if(pPrevParametr[i].CurrentValue != pParametr[i].CurrentValue || pPrevParametr[i].Flag != pParametr[i].Flag) + hr = pProcAmp->Set(VideoProcAmp_Brightness + i, pParametr[i].CurrentValue, pParametr[i].Flag); + + } + + pProcAmp->Release(); + } + + IAMCameraControl *pProcControl = NULL; + hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcControl)); + + if (SUCCEEDED(hr)) + { + for(unsigned int i = 0; i < 7; i++) + { + if(pPrevParametr[10 + i].CurrentValue != pParametr[10 + i].CurrentValue || pPrevParametr[10 + i].Flag != pParametr[10 + i].Flag) + hr = pProcControl->Set(CameraControl_Pan+i, pParametr[10 + i].CurrentValue, pParametr[10 + i].Flag); + } + + pProcControl->Release(); + } + + vd_PrevParametrs = parametrs; + } + } +} + +CamParametrs videoDevice::getParametrs() +{ + CamParametrs out; + + if(vd_IsSetuped) + { + if(vd_pSource) + { + Parametr *pParametr = (Parametr *)(&out); + + IAMVideoProcAmp *pProcAmp = NULL; + HRESULT hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcAmp)); + + if (SUCCEEDED(hr)) + { + for(unsigned int i = 0; i < 10; i++) + { + Parametr temp; + + hr = pProcAmp->GetRange(VideoProcAmp_Brightness+i, &temp.Min, &temp.Max, &temp.Step, &temp.Default, &temp.Flag); + + if (SUCCEEDED(hr)) + { + temp.CurrentValue = temp.Default; + + pParametr[i] = temp; + } + } + + pProcAmp->Release(); + } + + IAMCameraControl *pProcControl = NULL; + hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcControl)); + + if (SUCCEEDED(hr)) + { + for(unsigned int i = 0; i < 7; i++) + { + Parametr temp; + + hr = pProcControl->GetRange(CameraControl_Pan+i, &temp.Min, &temp.Max, &temp.Step, &temp.Default, &temp.Flag); + + if (SUCCEEDED(hr)) + { + temp.CurrentValue = temp.Default; + + pParametr[10 + i] = temp; + } + } + + pProcControl->Release(); + } + } + } + + return out; +} + +long videoDevice::resetDevice(IMFActivate *pActivate) +{ + HRESULT hr = -1; + + vd_CurrentFormats.clear(); + + if(vd_pFriendlyName) + CoTaskMemFree(vd_pFriendlyName); + + vd_pFriendlyName = NULL; + + if(pActivate) + { + IMFMediaSource *pSource = NULL; + + hr = pActivate->GetAllocatedString( + MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME, + &vd_pFriendlyName, + NULL + ); + + hr = pActivate->ActivateObject( + __uuidof(IMFMediaSource), + (void**)&pSource + ); + + enumerateCaptureFormats(pSource); + + buildLibraryofTypes(); + + SafeRelease(&pSource); + + if(FAILED(hr)) + { + vd_pFriendlyName = NULL; + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"VIDEODEVICE %i: IMFMediaSource interface cannot be created \n", vd_CurrentNumber); + } + } + + return hr; +} + +long videoDevice::readInfoOfDevice(IMFActivate *pActivate, unsigned int Num) +{ + HRESULT hr = -1; + + vd_CurrentNumber = Num; + + hr = resetDevice(pActivate); + + return hr; +} + +long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice) +{ + HRESULT hr = S_OK; + + IMFActivate **ppDevices = NULL; + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + UINT32 count; + + wchar_t *newFriendlyName = NULL; + + hr = MFEnumDeviceSources(pAttributes, &ppDevices, &count); + + if (SUCCEEDED(hr)) + { + if(count > 0) + { + if(count > vd_CurrentNumber) + { + hr = ppDevices[vd_CurrentNumber]->GetAllocatedString( + MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME, + &newFriendlyName, + NULL + ); + + if (SUCCEEDED(hr)) + { + if(wcscmp(newFriendlyName, vd_pFriendlyName) != 0) + { + DPO->printOut(L"VIDEODEVICE %i: Chosen device cannot be found \n", vd_CurrentNumber); + + hr = -1; + + pDevice = NULL; + } + else + { + *pDevice = ppDevices[vd_CurrentNumber]; + + (*pDevice)->AddRef(); + } + } + else + { + DPO->printOut(L"VIDEODEVICE %i: Name of device cannot be gotten \n", vd_CurrentNumber); + } + + } + else + { + DPO->printOut(L"VIDEODEVICE %i: Number of devices more than corrent number of the device \n", vd_CurrentNumber); + + hr = -1; + } + + for(UINT32 i = 0; i < count; i++) + { + SafeRelease(&ppDevices[i]); + } + + SafeRelease(ppDevices); + } + else + hr = -1; + } + else + { + DPO->printOut(L"VIDEODEVICE %i: List of DeviceSources cannot be enumerated \n", vd_CurrentNumber); + } + + return hr; +} + +long videoDevice::initDevice() +{ + HRESULT hr = -1; + + IMFAttributes *pAttributes = NULL; + + IMFActivate * vd_pActivate= NULL; + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + CoInitialize(NULL); + + hr = MFCreateAttributes(&pAttributes, 1); + + if (SUCCEEDED(hr)) + { + hr = pAttributes->SetGUID( + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, + MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID + ); + } + + if (SUCCEEDED(hr)) + { + hr = checkDevice(pAttributes, &vd_pActivate); + + if (SUCCEEDED(hr) && vd_pActivate) + { + SafeRelease(&vd_pSource); + + hr = vd_pActivate->ActivateObject( + __uuidof(IMFMediaSource), + (void**)&vd_pSource + ); + + if (SUCCEEDED(hr)) + { + + } + + SafeRelease(&vd_pActivate); + } + else + { + DPO->printOut(L"VIDEODEVICE %i: Device there is not \n", vd_CurrentNumber); + } + } + else + { + + DPO->printOut(L"VIDEODEVICE %i: The attribute of video cameras cannot be getting \n", vd_CurrentNumber); + + } + + SafeRelease(&pAttributes); + + return hr; +} + +MediaType videoDevice::getFormat(unsigned int id) +{ + if(id < vd_CurrentFormats.size()) + { + return vd_CurrentFormats[id]; + } + else return MediaType(); + +} + +int videoDevice::getCountFormats() +{ + return vd_CurrentFormats.size(); +} + +void videoDevice::setEmergencyStopEvent(void *userData, void(*func)(int, void *)) +{ + vd_func = func; + + vd_userData = userData; +} + +void videoDevice::closeDevice() +{ + if(vd_IsSetuped) + { + vd_IsSetuped = false; + + vd_pSource->Stop(); + + SafeRelease(&vd_pSource); + + if(vd_LockOut == RawDataLock) + { + vd_pImGrTh->stop(); + + Sleep(500); + + delete vd_pImGrTh; + } + + vd_pImGrTh = NULL; + + vd_LockOut = OpenLock; + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + DPO->printOut(L"VIDEODEVICE %i: Device is stopped \n", vd_CurrentNumber); + } +} + +unsigned int videoDevice::getWidth() +{ + if(vd_IsSetuped) + return vd_Width; + else + return 0; +} + +unsigned int videoDevice::getHeight() +{ + if(vd_IsSetuped) + return vd_Height; + else + return 0; +} + +IMFMediaSource *videoDevice::getMediaSource() +{ + IMFMediaSource *out = NULL; + + if(vd_LockOut == OpenLock) + { + vd_LockOut = MediaSourceLock; + + out = vd_pSource; + } + + return out; +} + +int videoDevice::findType(unsigned int size, unsigned int frameRate) +{ + if(vd_CaptureFormats.size() == 0) + return 0; + + FrameRateMap FRM = vd_CaptureFormats[size]; + + if(FRM.size() == 0) + return 0; + + UINT64 frameRateMax = 0; SUBTYPEMap STMMax; + + if(frameRate == 0) + { + std::map::iterator f = FRM.begin(); + + for(; f != FRM.end(); f++) + { + if((*f).first >= frameRateMax) + { + frameRateMax = (*f).first; + + STMMax = (*f).second; + } + } + + } + else + { + std::map::iterator f = FRM.begin(); + + for(; f != FRM.end(); f++) + { + if((*f).first >= frameRateMax) + { + if(frameRate > (*f).first) + { + frameRateMax = (*f).first; + + STMMax = (*f).second; + } + } + } + } + + if(STMMax.size() == 0) + return 0; + + + std::map::iterator S = STMMax.begin(); + + vectorNum VN = (*S).second; + + if(VN.size() == 0) + return 0; + + return VN[0]; + +} + +void videoDevice::buildLibraryofTypes() +{ + unsigned int size; + + unsigned int framerate; + + std::vector::iterator i = vd_CurrentFormats.begin(); + + int count = 0; + + for(; i != vd_CurrentFormats.end(); i++) + { + size = (*i).MF_MT_FRAME_SIZE; + + framerate = (*i).MF_MT_FRAME_RATE; + + FrameRateMap FRM = vd_CaptureFormats[size]; + + SUBTYPEMap STM = FRM[framerate]; + + String subType((*i).pMF_MT_SUBTYPEName); + + vectorNum VN = STM[subType]; + + VN.push_back(count); + + STM[subType] = VN; + + FRM[framerate] = STM; + + vd_CaptureFormats[size] = FRM; + + count++; + } +} + +long videoDevice::setDeviceFormat(IMFMediaSource *pSource, unsigned long dwFormatIndex) +{ + IMFPresentationDescriptor *pPD = NULL; + IMFStreamDescriptor *pSD = NULL; + IMFMediaTypeHandler *pHandler = NULL; + IMFMediaType *pType = NULL; + + HRESULT hr = pSource->CreatePresentationDescriptor(&pPD); + if (FAILED(hr)) + { + goto done; + } + + BOOL fSelected; + hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); + if (FAILED(hr)) + { + goto done; + } + + hr = pSD->GetMediaTypeHandler(&pHandler); + if (FAILED(hr)) + { + goto done; + } + + hr = pHandler->GetMediaTypeByIndex((DWORD)dwFormatIndex, &pType); + if (FAILED(hr)) + { + goto done; + } + + hr = pHandler->SetCurrentMediaType(pType); + +done: + SafeRelease(&pPD); + SafeRelease(&pSD); + SafeRelease(&pHandler); + SafeRelease(&pType); + return hr; +} + +bool videoDevice::isDeviceSetup() +{ + return vd_IsSetuped; +} + +RawImage * videoDevice::getRawImageOut() +{ + if(!vd_IsSetuped) return NULL; + + if(vd_pImGrTh) + return vd_pImGrTh->getImageGrabber()->getRawImage(); + else + { + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"VIDEODEVICE %i: The instance of ImageGrabberThread class does not exist \n", vd_CurrentNumber); + } + return NULL; +} + +bool videoDevice::isFrameNew() +{ + if(!vd_IsSetuped) return false; + + if(vd_LockOut == RawDataLock || vd_LockOut == OpenLock) + { + if(vd_LockOut == OpenLock) + { + vd_LockOut = RawDataLock; + + HRESULT hr = ImageGrabberThread::CreateInstance(&vd_pImGrTh, vd_pSource, vd_CurrentNumber); + + if(FAILED(hr)) + { + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"VIDEODEVICE %i: The instance of ImageGrabberThread class cannot be created.\n", vd_CurrentNumber); + + return false; + } + + vd_pImGrTh->setEmergencyStopEvent(vd_userData, vd_func); + + vd_pImGrTh->start(); + + return true; + } + + if(vd_pImGrTh) + return vd_pImGrTh->getImageGrabber()->getRawImage()->isNew(); + + } + + return false; +} + +bool videoDevice::isDeviceMediaSource() +{ + if(vd_LockOut == MediaSourceLock) return true; + + return false; +} + +bool videoDevice::isDeviceRawDataSource() +{ + if(vd_LockOut == RawDataLock) return true; + + return false; +} + +bool videoDevice::setupDevice(unsigned int id) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if(!vd_IsSetuped) + { + HRESULT hr = -1; + + hr = initDevice(); + + if(SUCCEEDED(hr)) + { + vd_Width = vd_CurrentFormats[id].width; + + vd_Height = vd_CurrentFormats[id].height; + + hr = setDeviceFormat(vd_pSource, (DWORD) id); + + vd_IsSetuped = (SUCCEEDED(hr)); + + if(vd_IsSetuped) + DPO->printOut(L"\n\nVIDEODEVICE %i: Device is setuped \n", vd_CurrentNumber); + + vd_PrevParametrs = getParametrs(); + + return vd_IsSetuped; + } + else + { + DPO->printOut(L"VIDEODEVICE %i: Interface IMFMediaSource cannot be got \n", vd_CurrentNumber); + + return false; + } + } + else + { + DPO->printOut(L"VIDEODEVICE %i: Device is setuped already \n", vd_CurrentNumber); + + return false; + } +} + +bool videoDevice::setupDevice(unsigned int w, unsigned int h, unsigned int idealFramerate) +{ + unsigned int id = findType(w * h, idealFramerate); + + return setupDevice(id); +} + +wchar_t *videoDevice::getName() +{ + return vd_pFriendlyName; +} + +videoDevice::~videoDevice(void) +{ + closeDevice(); + + SafeRelease(&vd_pSource); + + if(vd_pFriendlyName) + CoTaskMemFree(vd_pFriendlyName); +} + +long videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource) +{ + IMFPresentationDescriptor *pPD = NULL; + IMFStreamDescriptor *pSD = NULL; + IMFMediaTypeHandler *pHandler = NULL; + IMFMediaType *pType = NULL; + + HRESULT hr = pSource->CreatePresentationDescriptor(&pPD); + if (FAILED(hr)) + { + goto done; + } + + BOOL fSelected; + hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); + if (FAILED(hr)) + { + goto done; + } + + hr = pSD->GetMediaTypeHandler(&pHandler); + if (FAILED(hr)) + { + goto done; + } + + DWORD cTypes = 0; + hr = pHandler->GetMediaTypeCount(&cTypes); + if (FAILED(hr)) + { + goto done; + } + + for (DWORD i = 0; i < cTypes; i++) + { + hr = pHandler->GetMediaTypeByIndex(i, &pType); + + if (FAILED(hr)) + { + goto done; + } + + MediaType MT = FormatReader::Read(pType); + + vd_CurrentFormats.push_back(MT); + + SafeRelease(&pType); + } + +done: + SafeRelease(&pPD); + SafeRelease(&pSD); + SafeRelease(&pHandler); + SafeRelease(&pType); + + return hr; +} + + +videoDevices::videoDevices(void): count(0) +{} + +void videoDevices::clearDevices() +{ + std::vector::iterator i = vds_Devices.begin(); + + for(; i != vds_Devices.end(); ++i) + delete (*i); + + vds_Devices.clear(); +} + +videoDevices::~videoDevices(void) +{ + clearDevices(); +} + +videoDevice * videoDevices::getDevice(unsigned int i) +{ + if(i >= vds_Devices.size()) + { + return NULL; + } + + if(i < 0) + { + return NULL; + } + + return vds_Devices[i]; +} + +long videoDevices::initDevices(IMFAttributes *pAttributes) +{ + HRESULT hr = S_OK; + + IMFActivate **ppDevices = NULL; + + clearDevices(); + + hr = MFEnumDeviceSources(pAttributes, &ppDevices, &count); + + if (SUCCEEDED(hr)) + { + if(count > 0) + { + for(UINT32 i = 0; i < count; i++) + { + videoDevice *vd = new videoDevice; + vd->readInfoOfDevice(ppDevices[i], i); + vds_Devices.push_back(vd); + + SafeRelease(&ppDevices[i]); + } + SafeRelease(ppDevices); + } + else + hr = -1; + } + else + { + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"VIDEODEVICES: The instances of the videoDevice class cannot be created\n"); + } + + return hr; +} + +size_t videoDevices::getCount() +{ + return vds_Devices.size(); +} + +videoDevices& videoDevices::getInstance() +{ + static videoDevices instance; + + return instance; +} + +Parametr::Parametr() +{ + CurrentValue = 0; + + Min = 0; + + Max = 0; + + Step = 0; + + Default = 0; + + Flag = 0; +} + +MediaType::MediaType() +{ + pMF_MT_AM_FORMAT_TYPEName = NULL; + + pMF_MT_MAJOR_TYPEName = NULL; + + pMF_MT_SUBTYPEName = NULL; + + Clear(); +} + +MediaType::~MediaType() +{ + Clear(); +} + +void MediaType::Clear() +{ + + MF_MT_FRAME_SIZE = 0; + + height = 0; + + width = 0; + + MF_MT_YUV_MATRIX = 0; + + MF_MT_VIDEO_LIGHTING = 0; + + MF_MT_DEFAULT_STRIDE = 0; + + MF_MT_VIDEO_CHROMA_SITING = 0; + + MF_MT_FIXED_SIZE_SAMPLES = 0; + + MF_MT_VIDEO_NOMINAL_RANGE = 0; + + MF_MT_FRAME_RATE = 0; + + MF_MT_FRAME_RATE_low = 0; + + MF_MT_PIXEL_ASPECT_RATIO = 0; + + MF_MT_PIXEL_ASPECT_RATIO_low = 0; + + MF_MT_ALL_SAMPLES_INDEPENDENT = 0; + + MF_MT_FRAME_RATE_RANGE_MIN = 0; + + MF_MT_FRAME_RATE_RANGE_MIN_low = 0; + + MF_MT_SAMPLE_SIZE = 0; + + MF_MT_VIDEO_PRIMARIES = 0; + + MF_MT_INTERLACE_MODE = 0; + + MF_MT_FRAME_RATE_RANGE_MAX = 0; + + MF_MT_FRAME_RATE_RANGE_MAX_low = 0; + + memset(&MF_MT_MAJOR_TYPE, 0, sizeof(GUID)); + + memset(&MF_MT_AM_FORMAT_TYPE, 0, sizeof(GUID)); + + memset(&MF_MT_SUBTYPE, 0, sizeof(GUID)); +} + +videoInput::videoInput(void): accessToDevices(false) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"\n***** VIDEOINPUT LIBRARY - 2013 (Author: Evgeny Pereguda) *****\n\n"); + + updateListOfDevices(); + if(!accessToDevices) + DPO->printOut(L"INITIALIZATION: Ther is not any suitable video device\n"); +} + +void videoInput::updateListOfDevices() +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + Media_Foundation *MF = &Media_Foundation::getInstance(); + accessToDevices = MF->buildListOfDevices(); + + if(!accessToDevices) + DPO->printOut(L"UPDATING: Ther is not any suitable video device\n"); +} + +videoInput::~videoInput(void) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + DPO->printOut(L"\n***** CLOSE VIDEOINPUT LIBRARY - 2013 *****\n\n"); +} + +IMFMediaSource *videoInput::getMediaSource(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + { + IMFMediaSource *out = VD->getMediaSource(); + + if(!out) + DPO->printOut(L"VideoDevice %i: There is not any suitable IMFMediaSource interface\n", deviceID); + + return out; + } + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return NULL; +} + +bool videoInput::setupDevice(int deviceID, unsigned int id) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0 ) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return false; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + { + bool out = VD->setupDevice(id); + + if(!out) + DPO->printOut(L"VIDEODEVICE %i: This device cannot be started\n", deviceID); + + return out; + } + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return false; +} + +bool videoInput::setupDevice(int deviceID, unsigned int w, unsigned int h, unsigned int idealFramerate) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0 ) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return false; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + { + bool out = VD->setupDevice(w, h, idealFramerate); + + if(!out) + DPO->printOut(L"VIDEODEVICE %i: this device cannot be started\n", deviceID); + + return out; + } + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n", deviceID); + } + + return false; +} + +MediaType videoInput::getFormat(int deviceID, unsigned int id) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return MediaType(); + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + return VD->getFormat(id); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return MediaType(); +} + +bool videoInput::isDeviceSetup(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return false; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + return VD->isDeviceSetup(); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return false; +} + +bool videoInput::isDeviceMediaSource(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return false; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + return VD->isDeviceMediaSource(); + } + else + { + DPO->printOut(L"Device(s): There is not any suitable video device\n"); + } + + return false; +} + +bool videoInput::isDeviceRawDataSource(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return false; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + { + bool isRaw = VD->isDeviceRawDataSource(); + return isRaw; + } + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return false; +} + +bool videoInput::isFrameNew(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return false; + } + + if(accessToDevices) + { + if(!isDeviceSetup(deviceID)) + { + if(isDeviceMediaSource(deviceID)) + return false; + } + + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + { + return VD->isFrameNew(); + } + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return false; +} + +unsigned int videoInput::getCountFormats(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return 0; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + return VD->getCountFormats(); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return 0; +} + +void videoInput::closeAllDevices() +{ + videoDevices *VDS = &videoDevices::getInstance(); + + for(unsigned int i = 0; i < VDS->getCount(); i++) + closeDevice(i); +} + +void videoInput::setParametrs(int deviceID, CamParametrs parametrs) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice *VD = VDS->getDevice(deviceID); + + if(VD) + VD->setParametrs(parametrs); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } +} + +CamParametrs videoInput::getParametrs(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + CamParametrs out; + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return out; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice *VD = VDS->getDevice(deviceID); + + if(VD) + out = VD->getParametrs(); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return out; +} + +void videoInput::closeDevice(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice *VD = VDS->getDevice(deviceID); + + if(VD) + VD->closeDevice(); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } +} + +unsigned int videoInput::getWidth(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return 0; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + return VD->getWidth(); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return 0; +} + +unsigned int videoInput::getHeight(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return 0; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + return VD->getHeight(); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return 0; +} + +wchar_t *videoInput::getNameVideoDevice(int deviceID) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return NULL; + } + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + return VD->getName(); + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return L"Empty"; +} + +unsigned int videoInput::listDevices(bool silent) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + int out = 0; + + if(accessToDevices) + { + videoDevices *VDS = &videoDevices::getInstance(); + + out = VDS->getCount(); + + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if(!silent)DPO->printOut(L"\nVIDEOINPUT SPY MODE!\n\n"); + + if(!silent)DPO->printOut(L"SETUP: Looking For Capture Devices\n"); + + for(int i = 0; i < out; i++) + { + if(!silent)DPO->printOut(L"SETUP: %i) %s \n",i, getNameVideoDevice(i)); + } + + if(!silent)DPO->printOut(L"SETUP: %i Device(s) found\n\n", out); + + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return out; +} + +videoInput& videoInput::getInstance() +{ + static videoInput instance; + + return instance; +} + +bool videoInput::isDevicesAcceable() +{ + return accessToDevices; +} + +void videoInput::setVerbose(bool state) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + DPO->setVerbose(state); +} + +void videoInput::setEmergencyStopEvent(int deviceID, void *userData, void(*func)(int, void *)) +{ + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return; + } + + if(accessToDevices) + { + if(func) + { + videoDevices *VDS = &videoDevices::getInstance(); + + videoDevice * VD = VDS->getDevice(deviceID); + + if(VD) + VD->setEmergencyStopEvent(userData, func); + } + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } +} + +bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRedAndBlue, bool flipImage) +{ + bool success = false; + unsigned int bytes = 3; + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + if (deviceID < 0) + { + DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); + return success; + } + + if(accessToDevices) + { + bool isRaw = isDeviceRawDataSource(deviceID); + if(isRaw) + { + videoDevices *VDS = &videoDevices::getInstance(); + DebugPrintOut *DPO = &DebugPrintOut::getInstance(); + + RawImage *RIOut = VDS->getDevice(deviceID)->getRawImageOut(); + + if(RIOut) + { + unsigned int height = VDS->getDevice(deviceID)->getHeight(); + unsigned int width = VDS->getDevice(deviceID)->getWidth(); + + unsigned int size = bytes * width * height; + + if(size == RIOut->getSize()) + { + processPixels(RIOut->getpPixels(), dstBuffer, width, height, bytes, flipRedAndBlue, flipImage); + success = true; + } + else + { + DPO->printOut(L"ERROR: GetPixels() - bufferSizes do not match!\n"); + } + } + else + { + DPO->printOut(L"ERROR: GetPixels() - Unable to grab frame for device %i\n", deviceID); + } + } + else + { + DPO->printOut(L"ERROR: GetPixels() - Not raw data source device %i\n", deviceID); + } + } + else + { + DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); + } + + return success; +} + +void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigned int width, + unsigned int height, unsigned int bpp, bool bRGB, bool bFlip) +{ + unsigned int widthInBytes = width * bpp; + unsigned int numBytes = widthInBytes * height; + int *dstInt, *srcInt; + + if(!bRGB) + { + if(bFlip) + { + for(unsigned int y = 0; y < height; y++) + { + dstInt = (int *)(dst + (y * widthInBytes)); + srcInt = (int *)(src + ( (height -y -1) * widthInBytes)); + memcpy(dstInt, srcInt, widthInBytes); + } + + } + else + { + memcpy(dst, src, numBytes); + } + } + else + { + if(bFlip) + { + + unsigned int x = 0; + unsigned int y = (height - 1) * widthInBytes; + src += y; + + for(unsigned int i = 0; i < numBytes; i+=3) + { + if(x >= width) + { + x = 0; + src -= widthInBytes*2; + } + + *dst = *(src+2); + dst++; + + *dst = *(src+1); + dst++; + + *dst = *src; + dst++; + + src+=3; + x++; + } + } + else + { + for(unsigned int i = 0; i < numBytes; i+=3) + { + *dst = *(src+2); + dst++; + + *dst = *(src+1); + dst++; + + *dst = *src; + dst++; + + src+=3; + } + } + } +} + +} + +/******* Capturing video from camera via Microsoft Media Foundation **********/ + +class CvCaptureCAM_MSMF : public CvCapture +{ +public: + CvCaptureCAM_MSMF(); + virtual ~CvCaptureCAM_MSMF(); + + virtual bool open( int index ); + virtual void close(); + virtual double getProperty(int); + virtual bool setProperty(int, double); + virtual bool grabFrame(); + virtual IplImage* retrieveFrame(int); + virtual int getCaptureDomain() { return CV_CAP_MSMF; } // Return the type of the capture object: CV_CAP_VFW, etc... + +protected: + void init(); + + int index, width, height,fourcc; + int widthSet, heightSet; + IplImage* frame; + videoInput VI; +}; + +struct SuppressVideoInputMessages +{ + SuppressVideoInputMessages() { videoInput::setVerbose(true); } +}; + +static SuppressVideoInputMessages do_it; + +CvCaptureCAM_MSMF::CvCaptureCAM_MSMF(): + index(-1), + width(-1), + height(-1), + fourcc(-1), + widthSet(-1), + heightSet(-1), + frame(0), + VI(videoInput::getInstance()) +{ + CoInitialize(0); +} + +CvCaptureCAM_MSMF::~CvCaptureCAM_MSMF() +{ + close(); + CoUninitialize(); +} + +void CvCaptureCAM_MSMF::close() +{ + if( index >= 0 ) + { + VI.closeDevice(index); + index = -1; + cvReleaseImage(&frame); + } + widthSet = heightSet = width = height = -1; +} + +// Initialize camera input +bool CvCaptureCAM_MSMF::open( int _index ) +{ + int try_index = _index; + int devices = 0; + + close(); + devices = VI.listDevices(true); + if (devices == 0) + return false; + try_index = try_index < 0 ? 0 : (try_index > devices-1 ? devices-1 : try_index); + VI.setupDevice(try_index); + if( !VI.isFrameNew(try_index) ) + return false; + index = try_index; + return true; +} + +bool CvCaptureCAM_MSMF::grabFrame() +{ + return true; +} + + +IplImage* CvCaptureCAM_MSMF::retrieveFrame(int) +{ + if( !frame || (int)VI.getWidth(index) != frame->width || (int)VI.getHeight(index) != frame->height ) + { + if (frame) + cvReleaseImage( &frame ); + unsigned int w = VI.getWidth(index), h = VI.getHeight(index); + frame = cvCreateImage( cvSize(w,h), 8, 3 ); + } + + VI.getPixels( index, (uchar*)frame->imageData, false, true ); + + return frame; +} + +double CvCaptureCAM_MSMF::getProperty( int property_id ) +{ + // image format proprrties + switch( property_id ) + { + case CV_CAP_PROP_FRAME_WIDTH: + return VI.getWidth(index); + + case CV_CAP_PROP_FRAME_HEIGHT: + return VI.getHeight(index); + + case CV_CAP_PROP_FOURCC: + // FIXME: implement method in VideoInput back end + //return VI.getFourcc(index); + ; + + case CV_CAP_PROP_FPS: + // FIXME: implement method in VideoInput back end + //return VI.getFPS(index); + ; + } + + // video filter properties + switch( property_id ) + { + case CV_CAP_PROP_BRIGHTNESS: + case CV_CAP_PROP_CONTRAST: + case CV_CAP_PROP_HUE: + case CV_CAP_PROP_SATURATION: + case CV_CAP_PROP_SHARPNESS: + case CV_CAP_PROP_GAMMA: + case CV_CAP_PROP_MONOCROME: + case CV_CAP_PROP_WHITE_BALANCE_BLUE_U: + case CV_CAP_PROP_BACKLIGHT: + case CV_CAP_PROP_GAIN: + // FIXME: implement method in VideoInput back end + // if ( VI.getVideoSettingFilter(index, VI.getVideoPropertyFromCV(property_id), min_value, + // max_value, stepping_delta, current_value, flags,defaultValue) ) + // return (double)current_value; + return 0.; + } + + // camera properties + switch( property_id ) + { + case CV_CAP_PROP_PAN: + case CV_CAP_PROP_TILT: + case CV_CAP_PROP_ROLL: + case CV_CAP_PROP_ZOOM: + case CV_CAP_PROP_EXPOSURE: + case CV_CAP_PROP_IRIS: + case CV_CAP_PROP_FOCUS: + // FIXME: implement method in VideoInput back end + // if (VI.getVideoSettingCamera(index,VI.getCameraPropertyFromCV(property_id),min_value, + // max_value,stepping_delta,current_value,flags,defaultValue) ) return (double)current_value; + return 0.; + } + + // unknown parameter or value not available + return -1; +} + +bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) +{ + // image capture properties + bool handled = false; + switch( property_id ) + { + case CV_CAP_PROP_FRAME_WIDTH: + width = cvRound(value); + handled = true; + break; + + case CV_CAP_PROP_FRAME_HEIGHT: + height = cvRound(value); + handled = true; + break; + + case CV_CAP_PROP_FOURCC: + fourcc = (int)(unsigned long)(value); + if ( fourcc == -1 ) { + // following cvCreateVideo usage will pop up caprturepindialog here if fourcc=-1 + // TODO - how to create a capture pin dialog + } + handled = true; + break; + + case CV_CAP_PROP_FPS: + // FIXME: implement method in VideoInput back end + // int fps = cvRound(value); + // if (fps != VI.getFPS(index)) + // { + // VI.stopDevice(index); + // VI.setIdealFramerate(index,fps); + // if (widthSet > 0 && heightSet > 0) + // VI.setupDevice(index, widthSet, heightSet); + // else + // VI.setupDevice(index); + // } + // return VI.isDeviceSetup(index); + ; + + } + + if ( handled ) { + // a stream setting + if( width > 0 && height > 0 ) + { + if( width != (int)VI.getWidth(index) || height != (int)VI.getHeight(index) )//|| fourcc != VI.getFourcc(index) ) + { + // FIXME: implement method in VideoInput back end + // int fps = static_cast(VI.getFPS(index)); + // VI.stopDevice(index); + // VI.setIdealFramerate(index, fps); + // VI.setupDeviceFourcc(index, width, height, fourcc); + } + + bool success = VI.isDeviceSetup(index); + if (success) + { + widthSet = width; + heightSet = height; + width = height = fourcc = -1; + } + return success; + } + return true; + } + + // show video/camera filter dialog + // FIXME: implement method in VideoInput back end + // if ( property_id == CV_CAP_PROP_SETTINGS ) { + // VI.showSettingsWindow(index); + // return true; + // } + + //video Filter properties + switch( property_id ) + { + case CV_CAP_PROP_BRIGHTNESS: + case CV_CAP_PROP_CONTRAST: + case CV_CAP_PROP_HUE: + case CV_CAP_PROP_SATURATION: + case CV_CAP_PROP_SHARPNESS: + case CV_CAP_PROP_GAMMA: + case CV_CAP_PROP_MONOCROME: + case CV_CAP_PROP_WHITE_BALANCE_BLUE_U: + case CV_CAP_PROP_BACKLIGHT: + case CV_CAP_PROP_GAIN: + // FIXME: implement method in VideoInput back end + //return VI.setVideoSettingFilter(index,VI.getVideoPropertyFromCV(property_id),(long)value); + ; + } + + //camera properties + switch( property_id ) + { + case CV_CAP_PROP_PAN: + case CV_CAP_PROP_TILT: + case CV_CAP_PROP_ROLL: + case CV_CAP_PROP_ZOOM: + case CV_CAP_PROP_EXPOSURE: + case CV_CAP_PROP_IRIS: + case CV_CAP_PROP_FOCUS: + // FIXME: implement method in VideoInput back end + //return VI.setVideoSettingCamera(index,VI.getCameraPropertyFromCV(property_id),(long)value); + ; + } + + return false; +} + + +CvCapture* cvCreateCameraCapture_MSMF( int index ) +{ + CvCaptureCAM_MSMF* capture = new CvCaptureCAM_MSMF; + + try + { + if( capture->open( index )) + return capture; + } + catch(...) + { + delete capture; + throw; + } + + delete capture; + return 0; +} + +#endif diff --git a/modules/highgui/src/precomp.hpp b/modules/highgui/src/precomp.hpp index afa0735ee..aa327d6d7 100644 --- a/modules/highgui/src/precomp.hpp +++ b/modules/highgui/src/precomp.hpp @@ -118,6 +118,7 @@ CvVideoWriter* cvCreateVideoWriter_Win32( const char* filename, int fourcc, CvVideoWriter* cvCreateVideoWriter_VFW( const char* filename, int fourcc, double fps, CvSize frameSize, int is_color ); CvCapture* cvCreateCameraCapture_DShow( int index ); +CvCapture* cvCreateCameraCapture_MSMF( int index ); CvCapture* cvCreateCameraCapture_OpenNI( int index ); CvCapture* cvCreateFileCapture_OpenNI( const char* filename ); CvCapture* cvCreateCameraCapture_Android( int index ); diff --git a/modules/highgui/test/test_precomp.hpp b/modules/highgui/test/test_precomp.hpp index bbc6b41c7..0d0bd8022 100644 --- a/modules/highgui/test/test_precomp.hpp +++ b/modules/highgui/test/test_precomp.hpp @@ -18,7 +18,7 @@ #include "opencv2/imgproc/imgproc_c.h" #include -#if defined(HAVE_VIDEOINPUT) || \ +#if defined(HAVE_DSHOW) || \ defined(HAVE_TYZX) || \ defined(HAVE_VFW) || \ defined(HAVE_LIBV4L) || \ @@ -34,7 +34,7 @@ defined(HAVE_OPENNI) || \ defined(HAVE_XIMEA) || \ defined(HAVE_AVFOUNDATION) || \ - defined(HAVE_GIGE_API) || \ + defined(HAVE_GIGE_API) || \ (0) //defined(HAVE_ANDROID_NATIVE_CAMERA) || - enable after #1193 # define BUILD_WITH_CAMERA_SUPPORT 1 From ecea583afdd8ed10b13a5440d5d3aeb77c7ea83b Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 3 Apr 2013 15:57:26 +0800 Subject: [PATCH 37/67] Add ocl::stereobp function. OpenCL StereoBeliefPropagation, ported from GPU implementation. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 67 +++ modules/ocl/src/opencl/stereobp.cl | 380 +++++++++++++++++ modules/ocl/src/stereobp.cpp | 519 ++++++++++++++++++++++++ modules/ocl/test/test_calib3d.cpp | 55 ++- 4 files changed, 1015 insertions(+), 6 deletions(-) create mode 100644 modules/ocl/src/opencl/stereobp.cl create mode 100644 modules/ocl/src/stereobp.cpp diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index da7ca27ae..fa97b7df3 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -1701,6 +1701,73 @@ namespace cv private: oclMat minSSD, leBuf, riBuf; }; + class CV_EXPORTS StereoBeliefPropagation + { + public: + enum { DEFAULT_NDISP = 64 }; + enum { DEFAULT_ITERS = 5 }; + enum { DEFAULT_LEVELS = 5 }; + static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels); + explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int msg_type = CV_16S); + StereoBeliefPropagation(int ndisp, int iters, int levels, + float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump, + int msg_type = CV_32F); + void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); + void operator()(const oclMat &data, oclMat &disparity); + int ndisp; + int iters; + int levels; + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + int msg_type; + private: + oclMat u, d, l, r, u2, d2, l2, r2; + std::vector datas; + oclMat out; + }; + class CV_EXPORTS StereoConstantSpaceBP + { + public: + enum { DEFAULT_NDISP = 128 }; + enum { DEFAULT_ITERS = 8 }; + enum { DEFAULT_LEVELS = 4 }; + enum { DEFAULT_NR_PLANE = 4 }; + static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane); + explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, + int iters = DEFAULT_ITERS, + int levels = DEFAULT_LEVELS, + int nr_plane = DEFAULT_NR_PLANE, + int msg_type = CV_32F); + StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, + float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, + int min_disp_th = 0, + int msg_type = CV_32F); + void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); + int ndisp; + int iters; + int levels; + int nr_plane; + float max_data_term; + float data_weight; + float max_disc_term; + float disc_single_jump; + int min_disp_th; + int msg_type; + bool use_local_init_data_cost; + private: + oclMat u[2], d[2], l[2], r[2]; + oclMat disp_selected_pyr[2]; + oclMat data_cost; + oclMat data_cost_selected; + oclMat temp; + oclMat out; + }; } } #if defined _MSC_VER && _MSC_VER >= 1200 diff --git a/modules/ocl/src/opencl/stereobp.cl b/modules/ocl/src/opencl/stereobp.cl new file mode 100644 index 000000000..3196e581a --- /dev/null +++ b/modules/ocl/src/opencl/stereobp.cl @@ -0,0 +1,380 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@outlook.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other GpuMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#if defined (DOUBLE_SUPPORT) + +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif + +#endif + +#ifdef T_FLOAT +#define T float +#else +#define T short +#endif + +/////////////////////////////////////////////////////////////// +/////////////////common/////////////////////////////////////// +///////////////////////////////////////////////////////////// +T saturate_cast(float v){ +#ifdef T_SHORT + return convert_short_sat_rte(v); +#else + return v; +#endif +} + +#define FLOAT_MAX 3.402823466e+38f +typedef struct +{ + int cndisp; + float cmax_data_term; + float cdata_weight; + float cmax_disc_term; + float cdisc_single_jump; +}con_srtuct_t; +/////////////////////////////////////////////////////////////// +////////////////////////// comp data ////////////////////////// +/////////////////////////////////////////////////////////////// + +float pix_diff_1(__global const uchar *ls, __global const uchar *rs) +{ + return abs((int)(*ls) - *rs); +} + +float pix_diff_3(__global const uchar *ls, __global const uchar *rs) +{ + const float tr = 0.299f; + const float tg = 0.587f; + const float tb = 0.114f; + + float val; + + val = tb * abs((int)ls[0] - rs[0]); + val += tg * abs((int)ls[1] - rs[1]); + val += tr * abs((int)ls[2] - rs[2]); + + return val; +} +float pix_diff_4(__global const uchar *ls, __global const uchar *rs) +{ + uchar4 l, r; + l = *((__global uchar4 *)ls); + r = *((__global uchar4 *)rs); + + const float tr = 0.299f; + const float tg = 0.587f; + const float tb = 0.114f; + + float val; + + val = tb * abs((int)l.x - r.x); + val += tg * abs((int)l.y - r.y); + val += tr * abs((int)l.z - r.z); + + return val; +} + + +#ifndef CN +#define CN 4 +#endif + +#define CAT(X,Y) X##Y +#define CAT2(X,Y) CAT(X,Y) + +#define PIX_DIFF CAT2(pix_diff_, CN) + +__kernel void comp_data(__global uchar *left, int left_rows, int left_cols, int left_step, + __global uchar *right, int right_step, + __global T *data, int data_step, + __constant con_srtuct_t *con_st) +{ + int x = get_global_id(0); + int y = get_global_id(1); + + if (y > 0 && y < (left_rows - 1) && x > 0 && x < (left_cols - 1)) + { + data_step /= sizeof(T); + const __global uchar* ls = left + y * left_step + x * CN; + const __global uchar* rs = right + y * right_step + x * CN; + + __global T *ds = data + y * data_step + x; + + const unsigned int disp_step = data_step * left_rows; + + for (int disp = 0; disp < con_st -> cndisp; disp++) + { + if (x - disp >= 1) + { + float val = 0; + val = PIX_DIFF(ls, rs - disp * CN); + ds[disp * disp_step] = saturate_cast(fmin(con_st -> cdata_weight * val, + con_st -> cdata_weight * con_st -> cmax_data_term)); + } + else + { + ds[disp * disp_step] = saturate_cast(con_st -> cdata_weight * con_st -> cmax_data_term); + } + } + } +} + +/////////////////////////////////////////////////////////////// +//////////////////////// data step down /////////////////////// +/////////////////////////////////////////////////////////////// +__kernel void data_step_down(__global T *src, int src_rows, + __global T *dst, int dst_rows, int dst_cols, + int src_step, int dst_step, + int cndisp) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (x < dst_cols && y < dst_rows) + { + src_step /= sizeof(T); + dst_step /= sizeof(T); + for (int d = 0; d < cndisp; ++d) + { + float dst_reg; + dst_reg = src[(d * src_rows + (2*y+0)) * src_step + 2*x+0]; + dst_reg += src[(d * src_rows + (2*y+1)) * src_step + 2*x+0]; + dst_reg += src[(d * src_rows + (2*y+0)) * src_step + 2*x+1]; + dst_reg += src[(d * src_rows + (2*y+1)) * src_step + 2*x+1]; + + dst[(d * dst_rows + y) * dst_step + x] = saturate_cast(dst_reg); + } + } +} + +/////////////////////////////////////////////////////////////// +/////////////////// level up messages //////////////////////// +/////////////////////////////////////////////////////////////// +__kernel void level_up_message(__global T *src, int src_rows, int src_step, + __global T *dst, int dst_rows, int dst_cols, int dst_step, + int cndisp) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (x < dst_cols && y < dst_rows) + { + src_step /= sizeof(T); + dst_step /= sizeof(T); + + const int dst_disp_step = dst_step * dst_rows; + const int src_disp_step = src_step * src_rows; + + __global T *dstr = dst + y * dst_step + x; + __global const T *srcr = src + (y / 2 * src_step) + (x / 2); + + for (int d = 0; d < cndisp; ++d) + dstr[d * dst_disp_step] = srcr[d * src_disp_step]; + } +} + +/////////////////////////////////////////////////////////////// +//////////////////// calc all iterations ///////////////////// +/////////////////////////////////////////////////////////////// +void calc_min_linear_penalty(__global T * dst, int disp_step, + int cndisp, float cdisc_single_jump) +{ + float prev = dst[0]; + float cur; + + for (int disp = 1; disp < cndisp; ++disp) + { + prev += cdisc_single_jump; + cur = dst[disp_step * disp]; + + if (prev < cur) + { + cur = prev; + dst[disp_step * disp] = saturate_cast(prev); + } + + prev = cur; + } + + prev = dst[(cndisp - 1) * disp_step]; + for (int disp = cndisp - 2; disp >= 0; disp--) + { + prev += cdisc_single_jump; + cur = dst[disp_step * disp]; + + if (prev < cur) + { + cur = prev; + dst[disp_step * disp] = saturate_cast(prev); + } + prev = cur; + } +} +void message(const __global T *msg1, const __global T *msg2, + const __global T *msg3, const __global T *data, __global T *dst, + int msg_disp_step, int data_disp_step, int cndisp, float cmax_disc_term, float cdisc_single_jump) +{ + float minimum = FLOAT_MAX; + + for(int i = 0; i < cndisp; ++i) + { + float dst_reg; + dst_reg = msg1[msg_disp_step * i]; + dst_reg += msg2[msg_disp_step * i]; + dst_reg += msg3[msg_disp_step * i]; + dst_reg += data[data_disp_step * i]; + + if (dst_reg < minimum) + minimum = dst_reg; + + dst[msg_disp_step * i] = saturate_cast(dst_reg); + } + + calc_min_linear_penalty(dst, msg_disp_step, cndisp, cdisc_single_jump); + + minimum += cmax_disc_term; + + float sum = 0; + for(int i = 0; i < cndisp; ++i) + { + float dst_reg = dst[msg_disp_step * i]; + if (dst_reg > minimum) + { + dst_reg = minimum; + dst[msg_disp_step * i] = saturate_cast(minimum); + } + sum += dst_reg; + } + sum /= cndisp; + + for(int i = 0; i < cndisp; ++i) + dst[msg_disp_step * i] -= sum; +} +__kernel void one_iteration(__global T *u, int u_step, + __global T *data, int data_step, + __global T *d, __global T *l, __global T *r, + int t, int cols, int rows, + int cndisp, float cmax_disc_term, float cdisc_single_jump) +{ + const int y = get_global_id(1); + const int x = ((get_global_id(0)) << 1) + ((y + t) & 1); + + if ((y > 0) && (y < rows - 1) && (x > 0) && (x < cols - 1)) + { + u_step /= sizeof(T); + data_step /= sizeof(T); + + __global T *us = u + y * u_step + x; + __global T *ds = d + y * u_step + x; + __global T *ls = l + y * u_step + x; + __global T *rs = r + y * u_step + x; + const __global T *dt = data + y * data_step + x; + + int msg_disp_step = u_step * rows; + int data_disp_step = data_step * rows; + + message(us + u_step, ls + 1, rs - 1, dt, us, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + message(ds - u_step, ls + 1, rs - 1, dt, ds, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + + message(us + u_step, ds - u_step, rs - 1, dt, rs, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + message(us + u_step, ds - u_step, ls + 1, dt, ls, msg_disp_step, data_disp_step, cndisp, + cmax_disc_term, cdisc_single_jump); + } +} + +/////////////////////////////////////////////////////////////// +/////////////////////////// output //////////////////////////// +/////////////////////////////////////////////////////////////// +__kernel void output(const __global T *u, int u_step, + const __global T *d, const __global T *l, + const __global T *r, const __global T *data, + __global T *disp, int disp_rows, int disp_cols, int disp_step, + int cndisp) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (y > 0 && y < disp_rows - 1 && x > 0 && x < disp_cols - 1) + { + u_step /= sizeof(T); + disp_step /= sizeof(T); + const __global T *us = u + (y + 1) * u_step + x; + const __global T *ds = d + (y - 1) * u_step + x; + const __global T *ls = l + y * u_step + (x + 1); + const __global T *rs = r + y * u_step + (x - 1); + const __global T *dt = data + y * u_step + x; + + int disp_steps = disp_rows * u_step; + + int best = 0; + float best_val = FLOAT_MAX; + for (int d = 0; d < cndisp; ++d) + { + float val; + val = us[d * disp_steps]; + val += ds[d * disp_steps]; + val += ls[d * disp_steps]; + val += rs[d * disp_steps]; + val += dt[d * disp_steps]; + + if (val < best_val) + { + best_val = val; + best = d; + } + } + + (disp + y * disp_step)[x] = convert_short_sat(best); + } +} diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp new file mode 100644 index 000000000..acc31c9e5 --- /dev/null +++ b/modules/ocl/src/stereobp.cpp @@ -0,0 +1,519 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Institute Of Software Chinese Academy Of Science, all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Jia Haipeng, jiahaipeng95@gmail.com +// Peng Xiao, pengxiao@outlook.com +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" +#include +#include + +using namespace cv; +using namespace cv::ocl; +using namespace std; + +//////////////////////////////////////////////////////////////////////// +///////////////// stereoBP ///////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +namespace cv +{ + namespace ocl + { + + ///////////////////////////OpenCL kernel strings/////////////////////////// + extern const char *stereobp; + } + +} +namespace cv +{ + namespace ocl + { + namespace stereoBP + { + ////////////////////////////////////////////////////////////////////////// + //////////////////////////////common//////////////////////////////////// + //////////////////////////////////////////////////////////////////////// + typedef struct + { + int cndisp; + float cmax_data_term; + float cdata_weight; + float cmax_disc_term; + float cdisc_single_jump; + } con_struct_t; + + cl_mem cl_con_struct = NULL; + static void load_constants(Context *clCxt, int ndisp, float max_data_term, float data_weight, + float max_disc_term, float disc_single_jump) + { + con_struct_t *con_struct = new con_struct_t; + con_struct -> cndisp = ndisp; + con_struct -> cmax_data_term = max_data_term; + con_struct -> cdata_weight = data_weight; + con_struct -> cmax_disc_term = max_disc_term; + con_struct -> cdisc_single_jump = disc_single_jump; + + cl_con_struct = load_constant(clCxt->impl->clContext, clCxt->impl->clCmdQueue, (void *)con_struct, + sizeof(con_struct_t)); + + delete con_struct; + } + static void release_constants() + { + openCLFree(cl_con_struct); + } + static inline int divUp(int total, int grain) + { + return (total + grain - 1) / grain; + } + ///////////////////////////////////////////////////////////////////////////// + ///////////////////////////comp data//////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////// + static void comp_data_call(const oclMat &left, const oclMat &right, oclMat &data, int /*disp*/, + float /*cmax_data_term*/, float /*cdata_weight*/) + { + Context *clCxt = left.clCxt; + int channels = left.oclchannels(); + int data_type = data.type(); + + string kernelName = "comp_data"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&left.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&left.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&left.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&left.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&right.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&right.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&data.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&cl_con_struct)); + + size_t gt[3] = {left.cols, left.rows, 1}, lt[3] = {16, 16, 1}; + + const int OPT_SIZE = 50; + char cn_opt [OPT_SIZE] = ""; + sprintf( cn_opt, "%s -D CN=%d", + (data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"), + channels + ); + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, cn_opt); + } + /////////////////////////////////////////////////////////////////////////////////// + /////////////////////////data set down//////////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + static void data_step_down_call(int dst_cols, int dst_rows, int src_rows, + const oclMat &src, oclMat &dst, int disp) + { + Context *clCxt = src.clCxt; + int data_type = src.type(); + + string kernelName = "data_step_down"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src_rows)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp)); + + size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + ///////////////////////////////////////////////////////////////////////////////// + ///////////////////////////live up message//////////////////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + static void level_up_message_call(int dst_cols, int dst_rows, int src_rows, + oclMat &src, oclMat &dst, int ndisp) + { + Context *clCxt = src.clCxt; + int data_type = src.type(); + + string kernelName = "level_up_message"; + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&src.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src_rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&src.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&dst.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst_cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&dst.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&ndisp)); + + size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + static void level_up_messages_calls(int dst_idx, int dst_cols, int dst_rows, int src_rows, + oclMat *mus, oclMat *mds, oclMat *mls, oclMat *mrs, + int ndisp) + { + int src_idx = (dst_idx + 1) & 1; + + level_up_message_call(dst_cols, dst_rows, src_rows, + mus[src_idx], mus[dst_idx], ndisp); + + level_up_message_call(dst_cols, dst_rows, src_rows, + mds[src_idx], mds[dst_idx], ndisp); + + level_up_message_call(dst_cols, dst_rows, src_rows, + mls[src_idx], mls[dst_idx], ndisp); + + level_up_message_call(dst_cols, dst_rows, src_rows, + mrs[src_idx], mrs[dst_idx], ndisp); + } + ////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////cals_all_iterations_call/////////////////////////// + ///////////////////////////////////////////////////////////////////////////////// + static void calc_all_iterations_call(int cols, int rows, oclMat &u, oclMat &d, + oclMat &l, oclMat &r, oclMat &data, + int t, int cndisp, float cmax_disc_term, + float cdisc_single_jump) + { + Context *clCxt = l.clCxt; + int data_type = u.type(); + + string kernelName = "one_iteration"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&u.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&u.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&data.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&d.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&l.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&r.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&t)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&cndisp)); + args.push_back( make_pair( sizeof(cl_float) , (void *)&cmax_disc_term)); + args.push_back( make_pair( sizeof(cl_float) , (void *)&cdisc_single_jump)); + + size_t gt[3] = {cols, rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + + static void calc_all_iterations_calls(int cols, int rows, int iters, oclMat &u, + oclMat &d, oclMat &l, oclMat &r, + oclMat &data, int cndisp, float cmax_disc_term, + float cdisc_single_jump) + { + for(int t = 0; t < iters; ++t) + calc_all_iterations_call(cols, rows, u, d, l, r, data, t, cndisp, + cmax_disc_term, cdisc_single_jump); + } + /////////////////////////////////////////////////////////////////////////////// + ///////////////////////output/////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////// + static void output_call(const oclMat &u, const oclMat &d, const oclMat l, const oclMat &r, + const oclMat &data, oclMat &disp, int ndisp) + { + Context *clCxt = u.clCxt; + int data_type = u.type(); + + string kernelName = "output"; + + vector > args; + + args.push_back( make_pair( sizeof(cl_mem) , (void *)&u.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&u.step)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&d.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&l.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&r.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&data.data)); + args.push_back( make_pair( sizeof(cl_mem) , (void *)&disp.data)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp.rows)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp.cols)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&disp.step)); + args.push_back( make_pair( sizeof(cl_int) , (void *)&ndisp)); + + size_t gt[3] = {disp.cols, disp.rows, 1}, lt[3] = {16, 16, 1}; + char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); + } + } + } +} +namespace +{ + const float DEFAULT_MAX_DATA_TERM = 10.0f; + const float DEFAULT_DATA_WEIGHT = 0.07f; + const float DEFAULT_MAX_DISC_TERM = 1.7f; + const float DEFAULT_DISC_SINGLE_JUMP = 1.0f; +} + +void cv::ocl::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels) +{ + ndisp = width / 4; + if ((ndisp & 1) != 0) + ndisp++; + + int mm = ::max(width, height); + iters = mm / 100 + 2; + + levels = (int)(::log(static_cast(mm)) + 1) * 4 / 5; + if (levels == 0) levels++; +} + +cv::ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, int msg_type_) + : ndisp(ndisp_), iters(iters_), levels(levels_), + max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT), + max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), + msg_type(msg_type_), datas(levels_) +{ +} + +cv::ocl::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_, int msg_type_) + : ndisp(ndisp_), iters(iters_), levels(levels_), + max_data_term(max_data_term_), data_weight(data_weight_), + max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), + msg_type(msg_type_), datas(levels_) +{ +} + +namespace +{ + class StereoBeliefPropagationImpl + { + public: + StereoBeliefPropagationImpl(StereoBeliefPropagation &rthis_, + oclMat &u_, oclMat &d_, oclMat &l_, oclMat &r_, + oclMat &u2_, oclMat &d2_, oclMat &l2_, oclMat &r2_, + vector &datas_, oclMat &out_) + : rthis(rthis_), u(u_), d(d_), l(l_), r(r_), u2(u2_), d2(d2_), l2(l2_), r2(r2_), datas(datas_), out(out_), + zero(Scalar::all(0)), scale(rthis_.msg_type == CV_32F ? 1.0f : 10.0f) + { + CV_Assert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels); + CV_Assert(rthis.msg_type == CV_32F || rthis.msg_type == CV_16S); + CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < numeric_limits::max()); + } + + void operator()(const oclMat &left, const oclMat &right, oclMat &disp) + { + CV_Assert(left.size() == right.size() && left.type() == right.type()); + CV_Assert(left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4); + + rows = left.rows; + cols = left.cols; + + int divisor = (int)pow(2.f, rthis.levels - 1.0f); + int lowest_cols = cols / divisor; + int lowest_rows = rows / divisor; + const int min_image_dim_size = 2; + CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size); + + init(); + + datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type); + datas[0].setTo(Scalar_::all(0)); + + cv::ocl::stereoBP::comp_data_call(left, right, datas[0], rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight); + calcBP(disp); + } + + void operator()(const oclMat &data, oclMat &disp) + { + CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0)); + + rows = data.rows / rthis.ndisp; + cols = data.cols; + + int divisor = (int)pow(2.f, rthis.levels - 1.0f); + int lowest_cols = cols / divisor; + int lowest_rows = rows / divisor; + const int min_image_dim_size = 2; + CV_Assert(min(lowest_cols, lowest_rows) > min_image_dim_size); + + init(); + + datas[0] = data; + + calcBP(disp); + } + private: + void init() + { + u.create(rows * rthis.ndisp, cols, rthis.msg_type); + d.create(rows * rthis.ndisp, cols, rthis.msg_type); + l.create(rows * rthis.ndisp, cols, rthis.msg_type); + r.create(rows * rthis.ndisp, cols, rthis.msg_type); + + if (rthis.levels & 1) + { + //can clear less area + u = zero; + d = zero; + l = zero; + r = zero; + } + + if (rthis.levels > 1) + { + int less_rows = (rows + 1) / 2; + int less_cols = (cols + 1) / 2; + + u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); + + if ((rthis.levels & 1) == 0) + { + u2 = zero; + d2 = zero; + l2 = zero; + r2 = zero; + } + } + + cv::ocl::stereoBP::load_constants(u.clCxt, rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, + scale * rthis.max_disc_term, scale * rthis.disc_single_jump); + + datas.resize(rthis.levels); + cols_all.resize(rthis.levels); + rows_all.resize(rthis.levels); + + cols_all[0] = cols; + rows_all[0] = rows; + } + + void calcBP(oclMat &disp) + { + using namespace cv::ocl::stereoBP; + + for (int i = 1; i < rthis.levels; ++i) + { + cols_all[i] = (cols_all[i - 1] + 1) / 2; + rows_all[i] = (rows_all[i - 1] + 1) / 2; + + datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type); + datas[i].setTo(Scalar_::all(0)); + + data_step_down_call(cols_all[i], rows_all[i], rows_all[i - 1], + datas[i - 1], datas[i], rthis.ndisp); + } + + oclMat mus[] = {u, u2}; + oclMat mds[] = {d, d2}; + oclMat mrs[] = {r, r2}; + oclMat mls[] = {l, l2}; + + int mem_idx = (rthis.levels & 1) ? 0 : 1; + + for (int i = rthis.levels - 1; i >= 0; --i) + { + // for lower level we have already computed messages by setting to zero + if (i != rthis.levels - 1) + level_up_messages_calls(mem_idx, cols_all[i], rows_all[i], rows_all[i + 1], + mus, mds, mls, mrs, rthis.ndisp); + + calc_all_iterations_calls(cols_all[i], rows_all[i], rthis.iters, mus[mem_idx], + mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i], + rthis.ndisp, scale * rthis.max_disc_term, + scale * rthis.disc_single_jump); + + mem_idx = (mem_idx + 1) & 1; + } + if (disp.empty()) + disp.create(rows, cols, CV_16S); + + out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out)); + out = zero; + + output_call(u, d, l, r, datas.front(), out, rthis.ndisp); + + if (disp.type() != CV_16S) + out.convertTo(disp, disp.type()); + + release_constants(); + } + StereoBeliefPropagationImpl& operator=(const StereoBeliefPropagationImpl&); + + StereoBeliefPropagation &rthis; + + oclMat &u; + oclMat &d; + oclMat &l; + oclMat &r; + + oclMat &u2; + oclMat &d2; + oclMat &l2; + oclMat &r2; + + vector &datas; + oclMat &out; + + const Scalar zero; + const float scale; + + int rows, cols; + + vector cols_all, rows_all; + }; +} + +void cv::ocl::StereoBeliefPropagation::operator()(const oclMat &left, const oclMat &right, oclMat &disp) +{ + ::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out); + impl(left, right, disp); +} + +void cv::ocl::StereoBeliefPropagation::operator()(const oclMat &data, oclMat &disp) +{ + ::StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out); + impl(data, disp); +} + diff --git a/modules/ocl/test/test_calib3d.cpp b/modules/ocl/test/test_calib3d.cpp index 58dbcc2e3..179829e0e 100644 --- a/modules/ocl/test/test_calib3d.cpp +++ b/modules/ocl/test/test_calib3d.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors - +// Peng Xiao, pengxiao@outlook.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -63,12 +63,12 @@ PARAM_TEST_CASE(StereoMatchBM, int, int) } }; -TEST_P(StereoMatchBM, Accuracy) +TEST_P(StereoMatchBM, Regression) { - Mat left_image = readImage(workdir + "../ocl/aloe-L.png", IMREAD_GRAYSCALE); - Mat right_image = readImage(workdir + "../ocl/aloe-R.png", IMREAD_GRAYSCALE); - Mat disp_gold = readImage(workdir + "../ocl/aloe-disp.png", IMREAD_GRAYSCALE); + Mat left_image = readImage("stereobm/aloe-L.png", IMREAD_GRAYSCALE); + Mat right_image = readImage("stereobm/aloe-R.png", IMREAD_GRAYSCALE); + Mat disp_gold = readImage("stereobm/aloe-disp.png", IMREAD_GRAYSCALE); ocl::oclMat d_left, d_right; ocl::oclMat d_disp(left_image.size(), CV_8U); Mat disp; @@ -88,7 +88,50 @@ TEST_P(StereoMatchBM, Accuracy) EXPECT_MAT_SIMILAR(disp_gold, disp, 1e-3); } -INSTANTIATE_TEST_CASE_P(GPU_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128), +INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBM, testing::Combine(testing::Values(128), testing::Values(19))); +PARAM_TEST_CASE(StereoMatchBP, int, int, int, float, float, float, float) +{ + int ndisp_; + int iters_; + int levels_; + float max_data_term_; + float data_weight_; + float max_disc_term_; + float disc_single_jump_; + virtual void SetUp() + { + ndisp_ = GET_PARAM(0); + iters_ = GET_PARAM(1); + levels_ = GET_PARAM(2); + max_data_term_ = GET_PARAM(3); + data_weight_ = GET_PARAM(4); + max_disc_term_ = GET_PARAM(5); + disc_single_jump_ = GET_PARAM(6); + } +}; +TEST_P(StereoMatchBP, Regression) +{ + Mat left_image = readImage("stereobp/aloe-L.png"); + Mat right_image = readImage("stereobp/aloe-R.png"); + Mat disp_gold = readImage("stereobp/aloe-disp.png", IMREAD_GRAYSCALE); + ocl::oclMat d_left, d_right; + ocl::oclMat d_disp; + Mat disp; + ASSERT_FALSE(left_image.empty()); + ASSERT_FALSE(right_image.empty()); + ASSERT_FALSE(disp_gold.empty()); + d_left.upload(left_image); + d_right.upload(right_image); + ocl::StereoBeliefPropagation bp(ndisp_, iters_, levels_, max_data_term_, data_weight_, + max_disc_term_, disc_single_jump_, CV_16S); + bp(d_left, d_right, d_disp); + d_disp.download(disp); + disp.convertTo(disp, disp_gold.depth()); + EXPECT_MAT_NEAR(disp_gold, disp, 0.0, ""); +} +INSTANTIATE_TEST_CASE_P(OCL_Calib3D, StereoMatchBP, testing::Combine(testing::Values(64), + testing::Values(8),testing::Values(2),testing::Values(25.0f), + testing::Values(0.1f),testing::Values(15.0f),testing::Values(1.0f))); #endif // HAVE_OPENCL From 9d7e51eb4686670a81e455f222abc14d173cbc71 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 3 Apr 2013 12:39:03 +0400 Subject: [PATCH 38/67] fixed SparseMat Iterator compilation error (bug #2921) --- modules/core/include/opencv2/core/core.hpp | 2 ++ modules/core/include/opencv2/core/mat.hpp | 12 ++++++++++++ 2 files changed, 14 insertions(+) diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 8ac713818..7caf7538c 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -3767,6 +3767,7 @@ public: SparseMatConstIterator_(); //! the full constructor setting the iterator to the first sparse matrix element SparseMatConstIterator_(const SparseMat_<_Tp>* _m); + SparseMatConstIterator_(const SparseMat* _m); //! the copy constructor SparseMatConstIterator_(const SparseMatConstIterator_& it); @@ -3796,6 +3797,7 @@ public: SparseMatIterator_(); //! the full constructor setting the iterator to the first sparse matrix element SparseMatIterator_(SparseMat_<_Tp>* _m); + SparseMatIterator_(SparseMat* _m); //! the copy constructor SparseMatIterator_(const SparseMatIterator_& it); diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index c19caf902..f798d7f4a 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -2530,6 +2530,13 @@ SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat_<_Tp>* _m) : SparseMatConstIterator(_m) {} +template inline +SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMat* _m) +: SparseMatConstIterator(_m) +{ + CV_Assert( _m->type() == DataType<_Tp>::type ); +} + template inline SparseMatConstIterator_<_Tp>::SparseMatConstIterator_(const SparseMatConstIterator_<_Tp>& it) : SparseMatConstIterator(it) @@ -2569,6 +2576,11 @@ SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat_<_Tp>* _m) : SparseMatConstIterator_<_Tp>(_m) {} +template inline +SparseMatIterator_<_Tp>::SparseMatIterator_(SparseMat* _m) +: SparseMatConstIterator_<_Tp>(_m) +{} + template inline SparseMatIterator_<_Tp>::SparseMatIterator_(const SparseMatIterator_<_Tp>& it) : SparseMatConstIterator_<_Tp>(it) From 7758322fd3f0c3d02365e8199ba7d7dfbcd46d7c Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 3 Apr 2013 16:42:44 +0800 Subject: [PATCH 39/67] Fix some build errors. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 37 ------------------------- modules/ocl/src/stereobp.cpp | 6 ++-- 2 files changed, 3 insertions(+), 40 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index fa97b7df3..613179f8b 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -1731,43 +1731,6 @@ namespace cv std::vector datas; oclMat out; }; - class CV_EXPORTS StereoConstantSpaceBP - { - public: - enum { DEFAULT_NDISP = 128 }; - enum { DEFAULT_ITERS = 8 }; - enum { DEFAULT_LEVELS = 4 }; - enum { DEFAULT_NR_PLANE = 4 }; - static void estimateRecommendedParams(int width, int height, int &ndisp, int &iters, int &levels, int &nr_plane); - explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, - int iters = DEFAULT_ITERS, - int levels = DEFAULT_LEVELS, - int nr_plane = DEFAULT_NR_PLANE, - int msg_type = CV_32F); - StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, - float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, - int min_disp_th = 0, - int msg_type = CV_32F); - void operator()(const oclMat &left, const oclMat &right, oclMat &disparity); - int ndisp; - int iters; - int levels; - int nr_plane; - float max_data_term; - float data_weight; - float max_disc_term; - float disc_single_jump; - int min_disp_th; - int msg_type; - bool use_local_init_data_cost; - private: - oclMat u[2], d[2], l[2], r[2]; - oclMat disp_selected_pyr[2]; - oclMat data_cost; - oclMat data_cost_selected; - oclMat temp; - oclMat out; - }; } } #if defined _MSC_VER && _MSC_VER >= 1200 diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp index acc31c9e5..70624879a 100644 --- a/modules/ocl/src/stereobp.cpp +++ b/modules/ocl/src/stereobp.cpp @@ -85,7 +85,7 @@ namespace cv } con_struct_t; cl_mem cl_con_struct = NULL; - static void load_constants(Context *clCxt, int ndisp, float max_data_term, float data_weight, + static void load_constants(int ndisp, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump) { con_struct_t *con_struct = new con_struct_t; @@ -95,7 +95,7 @@ namespace cv con_struct -> cmax_disc_term = max_disc_term; con_struct -> cdisc_single_jump = disc_single_jump; - cl_con_struct = load_constant(clCxt->impl->clContext, clCxt->impl->clCmdQueue, (void *)con_struct, + cl_con_struct = load_constant(*((cl_context*)getoclContext()), *((cl_command_queue*)getoclCommandQueue()), (void *)con_struct, sizeof(con_struct_t)); delete con_struct; @@ -418,7 +418,7 @@ namespace } } - cv::ocl::stereoBP::load_constants(u.clCxt, rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, + cv::ocl::stereoBP::load_constants(rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, scale * rthis.max_disc_term, scale * rthis.disc_single_jump); datas.resize(rthis.levels); From 917138f565c4fefeccf7c7c5e4caeb587ecf226f Mon Sep 17 00:00:00 2001 From: peng xiao Date: Wed, 3 Apr 2013 17:36:05 +0800 Subject: [PATCH 40/67] Fix compilation errors. --- modules/ocl/src/stereobp.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/ocl/src/stereobp.cpp b/modules/ocl/src/stereobp.cpp index 70624879a..bd88ec047 100644 --- a/modules/ocl/src/stereobp.cpp +++ b/modules/ocl/src/stereobp.cpp @@ -165,7 +165,7 @@ namespace cv args.push_back( make_pair( sizeof(cl_int) , (void *)&disp)); size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1}; - char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + const char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); } ///////////////////////////////////////////////////////////////////////////////// @@ -190,7 +190,7 @@ namespace cv args.push_back( make_pair( sizeof(cl_int) , (void *)&ndisp)); size_t gt[3] = {dst_cols, dst_rows, 1}, lt[3] = {16, 16, 1}; - char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + const char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); } static void level_up_messages_calls(int dst_idx, int dst_cols, int dst_rows, int src_rows, @@ -241,7 +241,7 @@ namespace cv args.push_back( make_pair( sizeof(cl_float) , (void *)&cdisc_single_jump)); size_t gt[3] = {cols, rows, 1}, lt[3] = {16, 16, 1}; - char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + const char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); } @@ -280,7 +280,7 @@ namespace cv args.push_back( make_pair( sizeof(cl_int) , (void *)&ndisp)); size_t gt[3] = {disp.cols, disp.rows, 1}, lt[3] = {16, 16, 1}; - char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; + const char* t_opt = data_type == CV_16S ? "-D T_SHORT":"-D T_FLOAT"; openCLExecuteKernel(clCxt, &stereobp, kernelName, gt, lt, args, -1, -1, t_opt); } } From 870563ba25e95c2da8e1f067485796ce938ed4d1 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 3 Apr 2013 13:29:04 +0400 Subject: [PATCH 41/67] fixed compilation warning for CUDA files command line option '-Wsign-promo' is valid for C++/ObjC++ but not for C [enabled by default] --- cmake/OpenCVDetectCUDA.cmake | 5 ++++- modules/gpu/CMakeLists.txt | 2 -- modules/superres/CMakeLists.txt | 2 -- 3 files changed, 4 insertions(+), 5 deletions(-) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index e853a8d0a..4c3248be2 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -142,11 +142,14 @@ if(CUDA_FOUND) foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG) set(${var}_backup_in_cuda_compile_ "${${var}}") - # we reomove /EHa as it leasd warnings under windows + # we remove /EHa as it generates warnings under windows string(REPLACE "/EHa" "" ${var} "${${var}}") # we remove -ggdb3 flag as it leads to preprocessor errors when compiling CUDA files (CUDA 4.1) string(REPLACE "-ggdb3" "" ${var} "${${var}}") + + # we remove -Wsign-promo as it generates warnings under linux + string(REPLACE "-Wsign-promo" "" ${var} "${${var}}") endforeach() if(BUILD_SHARED_LIBS) diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index 8fba2ef4e..550922641 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -29,8 +29,6 @@ if(HAVE_CUDA) source_group("Src\\NVidia" FILES ${ncv_files}) ocv_include_directories("src/nvidia" "src/nvidia/core" "src/nvidia/NPP_staging" ${CUDA_INCLUDE_DIRS}) ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter /wd4211 /wd4201 /wd4100 /wd4505 /wd4408) - string(REPLACE "-Wsign-promo" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - #set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler;/EHsc-;") if(MSVC) if(NOT ENABLE_NOISY_WARNINGS) diff --git a/modules/superres/CMakeLists.txt b/modules/superres/CMakeLists.txt index 5e82629ae..92ce01c2d 100644 --- a/modules/superres/CMakeLists.txt +++ b/modules/superres/CMakeLists.txt @@ -9,8 +9,6 @@ ocv_module_include_directories() ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef /wd4127) if(HAVE_CUDA) - string(REPLACE "-Wsign-promo" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") - ocv_source_group("Src\\Cuda" GLOB "src/cuda/*.cu") ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include" ${CUDA_INCLUDE_DIRS}) From 3929a4387360ffd1e2b2ab7a9994fb17e29e98a0 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 3 Apr 2013 14:43:23 +0400 Subject: [PATCH 42/67] Native activity install fixed. --- samples/android/native-activity/.classpath | 8 ++ samples/android/native-activity/.cproject | 75 +++++++++++++++++++ samples/android/native-activity/.project | 33 ++++++++ .../.settings/org.eclipse.jdt.core.prefs | 4 + 4 files changed, 120 insertions(+) create mode 100644 samples/android/native-activity/.classpath create mode 100644 samples/android/native-activity/.cproject create mode 100644 samples/android/native-activity/.project create mode 100644 samples/android/native-activity/.settings/org.eclipse.jdt.core.prefs diff --git a/samples/android/native-activity/.classpath b/samples/android/native-activity/.classpath new file mode 100644 index 000000000..3f9691c5d --- /dev/null +++ b/samples/android/native-activity/.classpath @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/samples/android/native-activity/.cproject b/samples/android/native-activity/.cproject new file mode 100644 index 000000000..09687f3ac --- /dev/null +++ b/samples/android/native-activity/.cproject @@ -0,0 +1,75 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/android/native-activity/.project b/samples/android/native-activity/.project new file mode 100644 index 000000000..cf0823c0b --- /dev/null +++ b/samples/android/native-activity/.project @@ -0,0 +1,33 @@ + + + OpenCV Sample - native-activity + + + + + + com.android.ide.eclipse.adt.ResourceManagerBuilder + + + + + com.android.ide.eclipse.adt.PreCompilerBuilder + + + + + org.eclipse.jdt.core.javabuilder + + + + + com.android.ide.eclipse.adt.ApkBuilder + + + + + + com.android.ide.eclipse.adt.AndroidNature + org.eclipse.jdt.core.javanature + + diff --git a/samples/android/native-activity/.settings/org.eclipse.jdt.core.prefs b/samples/android/native-activity/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 000000000..b080d2ddc --- /dev/null +++ b/samples/android/native-activity/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 +org.eclipse.jdt.core.compiler.compliance=1.6 +org.eclipse.jdt.core.compiler.source=1.6 From a2561ee0cdc551e502fcd06b04e193521e8eac27 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 3 Apr 2013 03:48:23 -0700 Subject: [PATCH 43/67] Code review notes fixed. --- CMakeLists.txt | 2 +- cmake/OpenCVDetectCUDA.cmake | 2 +- cmake/OpenCVLegacyOptions.cmake | 1 + cmake/{ => checks}/OpenCVDetectCudaArch.cu | 0 .../core/include/opencv2/core/internal.hpp | 1 + modules/highgui/src/cap_msmf.cpp | 1116 ++--------------- platforms/winrt/arm.winrt.toolchain.cmake | 1 + 7 files changed, 107 insertions(+), 1016 deletions(-) rename cmake/{ => checks}/OpenCVDetectCudaArch.cu (100%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 9bf394f93..21922e6e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,7 +141,7 @@ OCV_OPTION(WITH_TIFF "Include TIFF support" ON OCV_OPTION(WITH_UNICAP "Include Unicap support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_V4L "Include Video 4 Linux support" ON IF (UNIX AND NOT ANDROID) ) OCV_OPTION(WITH_DSHOW "Build HighGUI with DirectShow support" ON IF (WIN32 AND NOT ARM) ) -OCV_OPTION(WITH_MSMF "Build HighGUI with Media Foundation support" OFF ) +OCV_OPTION(WITH_MSMF "Build HighGUI with Media Foundation support" OFF IF WIN32 ) OCV_OPTION(WITH_XIMEA "Include XIMEA cameras support" OFF IF (NOT ANDROID AND NOT APPLE) ) OCV_OPTION(WITH_XINE "Include Xine support (GPL)" OFF IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_OPENCL "Include OpenCL Runtime support" OFF IF (NOT ANDROID AND NOT IOS) ) diff --git a/cmake/OpenCVDetectCUDA.cmake b/cmake/OpenCVDetectCUDA.cmake index e853a8d0a..91cf1504b 100644 --- a/cmake/OpenCVDetectCUDA.cmake +++ b/cmake/OpenCVDetectCUDA.cmake @@ -57,7 +57,7 @@ if(CUDA_FOUND) elseif(CUDA_GENERATION STREQUAL "Kepler") set(__cuda_arch_bin "3.0") elseif(CUDA_GENERATION STREQUAL "Auto") - execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectCudaArch.cu" "--run" + execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run" WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/" RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/cmake/OpenCVLegacyOptions.cmake b/cmake/OpenCVLegacyOptions.cmake index a34c9e5ab..e05ad4c48 100644 --- a/cmake/OpenCVLegacyOptions.cmake +++ b/cmake/OpenCVLegacyOptions.cmake @@ -12,6 +12,7 @@ endmacro() ocv_legacy_option(BUILD_NEW_PYTHON_SUPPORT BUILD_opencv_python) ocv_legacy_option(BUILD_JAVA_SUPPORT BUILD_opencv_java) ocv_legacy_option(WITH_ANDROID_CAMERA BUILD_opencv_androidcamera) +ocv_legacy_option(WITH_VIDEOINPUT WITH_DSHOW) if(DEFINED OPENCV_BUILD_3RDPARTY_LIBS) set(BUILD_ZLIB ${OPENCV_BUILD_3RDPARTY_LIBS} CACHE BOOL "Set via depricated OPENCV_BUILD_3RDPARTY_LIBS" FORCE) diff --git a/cmake/OpenCVDetectCudaArch.cu b/cmake/checks/OpenCVDetectCudaArch.cu similarity index 100% rename from cmake/OpenCVDetectCudaArch.cu rename to cmake/checks/OpenCVDetectCudaArch.cu diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 8902e69de..5335fa01f 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -136,6 +136,7 @@ CV_INLINE IppiSize ippiSize(int width, int height) #ifdef __ARM_NEON__ # include # define CV_NEON 1 +# define CPU_HAS_NEON_FEATURE (true) #endif #ifndef CV_SSE diff --git a/modules/highgui/src/cap_msmf.cpp b/modules/highgui/src/cap_msmf.cpp index dbbad6f85..52b780463 100644 --- a/modules/highgui/src/cap_msmf.cpp +++ b/modules/highgui/src/cap_msmf.cpp @@ -38,20 +38,15 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ - #include "precomp.hpp" - #if (defined WIN32 || defined _WIN32) && defined HAVE_MSMF - /* Media Foundation-based Video Capturing module is based on videoInput library by Evgeny Pereguda: http://www.codeproject.com/Articles/559437/Capturing-of-video-from-web-camera-on-Windows-7-an - Originaly licensed under The Code Project Open License (CPOL) 1.02: http://www.codeproject.com/info/cpol10.aspx */ - #include #include #include @@ -59,32 +54,25 @@ #include #include #include "Strsafe.h" - #include #include #include #include - #include #include #include - #pragma warning(disable:4503) - #pragma comment(lib, "mfplat") #pragma comment(lib, "mf") #pragma comment(lib, "mfuuid") #pragma comment(lib, "Strmiids") #pragma comment(lib, "MinCore_Downlevel") - struct IMFMediaType; struct IMFActivate; struct IMFMediaSource; struct IMFAttributes; - namespace { - template void SafeRelease(T **ppT) { if (*ppT) @@ -93,7 +81,6 @@ template void SafeRelease(T **ppT) *ppT = NULL; } } - /// Class for printing info into consol class DebugPrintOut { @@ -103,72 +90,43 @@ public: void printOut(const wchar_t *format, ...); void setVerbose(bool state); bool verbose; - -private: - DebugPrintOut(void); +private: + DebugPrintOut(void); }; - // Structure for collecting info about types of video, which are supported by current video device struct MediaType { unsigned int MF_MT_FRAME_SIZE; - unsigned int height; - unsigned int width; - unsigned int MF_MT_YUV_MATRIX; - unsigned int MF_MT_VIDEO_LIGHTING; - unsigned int MF_MT_DEFAULT_STRIDE; - unsigned int MF_MT_VIDEO_CHROMA_SITING; - GUID MF_MT_AM_FORMAT_TYPE; - wchar_t *pMF_MT_AM_FORMAT_TYPEName; - unsigned int MF_MT_FIXED_SIZE_SAMPLES; - unsigned int MF_MT_VIDEO_NOMINAL_RANGE; - unsigned int MF_MT_FRAME_RATE; - unsigned int MF_MT_FRAME_RATE_low; - unsigned int MF_MT_PIXEL_ASPECT_RATIO; - unsigned int MF_MT_PIXEL_ASPECT_RATIO_low; - unsigned int MF_MT_ALL_SAMPLES_INDEPENDENT; - unsigned int MF_MT_FRAME_RATE_RANGE_MIN; - unsigned int MF_MT_FRAME_RATE_RANGE_MIN_low; - unsigned int MF_MT_SAMPLE_SIZE; - unsigned int MF_MT_VIDEO_PRIMARIES; - unsigned int MF_MT_INTERLACE_MODE; - unsigned int MF_MT_FRAME_RATE_RANGE_MAX; - unsigned int MF_MT_FRAME_RATE_RANGE_MAX_low; - GUID MF_MT_MAJOR_TYPE; - GUID MF_MT_SUBTYPE; - wchar_t *pMF_MT_MAJOR_TYPEName; wchar_t *pMF_MT_SUBTYPEName; - MediaType(); ~MediaType(); void Clear(); }; - /// Class for parsing info from IMFMediaType into the local MediaType class FormatReader { @@ -178,102 +136,71 @@ public: private: FormatReader(void); }; - DWORD WINAPI MainThreadFunction( LPVOID lpParam ); - typedef void(*emergensyStopEventCallback)(int, void *); - typedef unsigned char BYTE; - class RawImage { public: ~RawImage(void); // Function of creation of the instance of the class static long CreateInstance(RawImage **ppRImage,unsigned int size); - void setCopy(const BYTE * pSampleBuffer); void fastCopy(const BYTE * pSampleBuffer); unsigned char * getpPixels(); bool isNew(); unsigned int getSize(); - private: - bool ri_new; unsigned int ri_size; unsigned char *ri_pixels; RawImage(unsigned int size); }; - // Class for grabbing image from video stream class ImageGrabber : public IMFSampleGrabberSinkCallback { public: ~ImageGrabber(void); - HRESULT initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat); - HRESULT startGrabbing(void); - void stopGrabbing(); - RawImage *getRawImage(); - // Function of creation of the instance of the class static HRESULT CreateInstance(ImageGrabber **ppIG,unsigned int deviceID); - private: - bool ig_RIE; - bool ig_Close; - long m_cRef; - unsigned int ig_DeviceID; - IMFMediaSource *ig_pSource; - IMFMediaSession *ig_pSession; - IMFTopology *ig_pTopology; - RawImage *ig_RIFirst; - RawImage *ig_RISecond; - RawImage *ig_RIOut; - ImageGrabber(unsigned int deviceID); - HRESULT CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo); - HRESULT AddSourceNode( - IMFTopology *pTopology, - IMFMediaSource *pSource, - IMFPresentationDescriptor *pPD, - IMFStreamDescriptor *pSD, + IMFTopology *pTopology, + IMFMediaSource *pSource, + IMFPresentationDescriptor *pPD, + IMFStreamDescriptor *pSD, IMFTopologyNode **ppNode); - HRESULT AddOutputNode( - IMFTopology *pTopology, - IMFActivate *pActivate, - DWORD dwId, + IMFTopology *pTopology, + IMFActivate *pActivate, + DWORD dwId, IMFTopologyNode **ppNode); - // IUnknown methods STDMETHODIMP QueryInterface(REFIID iid, void** ppv); STDMETHODIMP_(ULONG) AddRef(); STDMETHODIMP_(ULONG) Release(); - // IMFClockStateSink methods STDMETHODIMP OnClockStart(MFTIME hnsSystemTime, LONGLONG llClockStartOffset); STDMETHODIMP OnClockStop(MFTIME hnsSystemTime); STDMETHODIMP OnClockPause(MFTIME hnsSystemTime); STDMETHODIMP OnClockRestart(MFTIME hnsSystemTime); STDMETHODIMP OnClockSetRate(MFTIME hnsSystemTime, float flRate); - // IMFSampleGrabberSinkCallback methods STDMETHODIMP OnSetPresentationClock(IMFPresentationClock* pClock); STDMETHODIMP OnProcessSample(REFGUID guidMajorMediaType, DWORD dwSampleFlags, @@ -281,66 +208,40 @@ private: DWORD dwSampleSize); STDMETHODIMP OnShutdown(); }; - /// Class for controlling of thread of the grabbing raw data from video device class ImageGrabberThread { friend DWORD WINAPI MainThreadFunction( LPVOID lpParam ); - public: ~ImageGrabberThread(void); - static HRESULT CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID); - void start(); - void stop(); - void setEmergencyStopEvent(void *userData, void(*func)(int, void *)); - ImageGrabber *getImageGrabber(); - protected: - virtual void run(); - private: - ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID); - HANDLE igt_Handle; - DWORD igt_ThreadIdArray; - ImageGrabber *igt_pImageGrabber; - emergensyStopEventCallback igt_func; - void *igt_userData; - bool igt_stop; - unsigned int igt_DeviceID; }; - // Structure for collecting info about one parametr of current video device struct Parametr { long CurrentValue; - long Min; - long Max; - long Step; - long Default; - long Flag; - Parametr(); }; - // Structure for collecting info about 17 parametrs of current video device struct CamParametrs { @@ -354,7 +255,6 @@ struct CamParametrs Parametr WhiteBalance; Parametr BacklightCompensation; Parametr Gain; - Parametr Pan; Parametr Tilt; Parametr Roll; @@ -363,138 +263,77 @@ struct CamParametrs Parametr Iris; Parametr Focus; }; - typedef std::wstring String; - typedef std::vector vectorNum; - typedef std::map SUBTYPEMap; - typedef std::map FrameRateMap; - typedef void(*emergensyStopEventCallback)(int, void *); - /// Class for controlling of video device class videoDevice { - public: videoDevice(void); ~videoDevice(void); - void closeDevice(); - CamParametrs getParametrs(); - void setParametrs(CamParametrs parametrs); - void setEmergencyStopEvent(void *userData, void(*func)(int, void *)); - long readInfoOfDevice(IMFActivate *pActivate, unsigned int Num); - wchar_t *getName(); - int getCountFormats(); - unsigned int getWidth(); - unsigned int getHeight(); - MediaType getFormat(unsigned int id); - bool setupDevice(unsigned int w, unsigned int h, unsigned int idealFramerate = 0); - bool setupDevice(unsigned int id); - bool isDeviceSetup(); - bool isDeviceMediaSource(); - bool isDeviceRawDataSource(); - bool isFrameNew(); - IMFMediaSource *getMediaSource(); - RawImage *getRawImageOut(); - private: - enum typeLock { MediaSourceLock, - RawDataLock, - OpenLock - } vd_LockOut; - wchar_t *vd_pFriendlyName; - ImageGrabberThread *vd_pImGrTh; - CamParametrs vd_PrevParametrs; - unsigned int vd_Width; - unsigned int vd_Height; - unsigned int vd_CurrentNumber; - bool vd_IsSetuped; - std::map vd_CaptureFormats; - std::vector vd_CurrentFormats; - IMFMediaSource *vd_pSource; - emergensyStopEventCallback vd_func; - void *vd_userData; - long enumerateCaptureFormats(IMFMediaSource *pSource); - long setDeviceFormat(IMFMediaSource *pSource, unsigned long dwFormatIndex); - void buildLibraryofTypes(); - int findType(unsigned int size, unsigned int frameRate = 0); - long resetDevice(IMFActivate *pActivate); - long initDevice(); - long checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice); }; - - /// Class for managing of list of video devices class videoDevices { public: ~videoDevices(void); - long initDevices(IMFAttributes *pAttributes); - static videoDevices& getInstance(); - videoDevice *getDevice(unsigned int i); - unsigned int getCount(); - void clearDevices(); - private: - UINT32 count; - std::vector vds_Devices; - videoDevices(void); }; - // Class for creating of Media Foundation context class Media_Foundation { @@ -502,212 +341,146 @@ public: virtual ~Media_Foundation(void); static Media_Foundation& getInstance(); bool buildListOfDevices(); - private: Media_Foundation(void); - }; - /// The only visiable class for controlling of video devices in format singelton class videoInput { public: virtual ~videoInput(void); - // Getting of static instance of videoInput class static videoInput& getInstance(); - // Closing video device with deviceID void closeDevice(int deviceID); - // Setting callback function for emergency events(for example: removing video device with deviceID) with userData void setEmergencyStopEvent(int deviceID, void *userData, void(*func)(int, void *)); - // Closing all devices void closeAllDevices(); - // Getting of parametrs of video device with deviceID CamParametrs getParametrs(int deviceID); - // Setting of parametrs of video device with deviceID void setParametrs(int deviceID, CamParametrs parametrs); - // Getting numbers of existence videodevices with listing in consol unsigned int listDevices(bool silent = false); - // Getting numbers of formats, which are supported by videodevice with deviceID unsigned int getCountFormats(int deviceID); - // Getting width of image, which is getting from videodevice with deviceID unsigned int getWidth(int deviceID); - // Getting height of image, which is getting from videodevice with deviceID unsigned int getHeight(int deviceID); - // Getting name of videodevice with deviceID wchar_t *getNameVideoDevice(int deviceID); - // Getting interface MediaSource for Media Foundation from videodevice with deviceID IMFMediaSource *getMediaSource(int deviceID); - - // Getting format with id, which is supported by videodevice with deviceID + // Getting format with id, which is supported by videodevice with deviceID MediaType getFormat(int deviceID, int unsigned id); - // Checking of existence of the suitable video devices bool isDevicesAcceable(); - // Checking of using the videodevice with deviceID bool isDeviceSetup(int deviceID); - // Checking of using MediaSource from videodevice with deviceID bool isDeviceMediaSource(int deviceID); - // Checking of using Raw Data of pixels from videodevice with deviceID bool isDeviceRawDataSource(int deviceID); - // Setting of the state of outprinting info in console static void setVerbose(bool state); - // Initialization of video device with deviceID by media type with id bool setupDevice(int deviceID, unsigned int id = 0); - // Initialization of video device with deviceID by wisth w, height h and fps idealFramerate bool setupDevice(int deviceID, unsigned int w, unsigned int h, unsigned int idealFramerate = 30); - - // Checking of recivig of new frame from video device with deviceID + // Checking of recivig of new frame from video device with deviceID bool isFrameNew(int deviceID); - // Writing of Raw Data pixels from video device with deviceID with correction of RedAndBlue flipping flipRedAndBlue and vertical flipping flipImage bool getPixels(int deviceID, unsigned char * pixels, bool flipRedAndBlue = false, bool flipImage = false); - -private: - +private: bool accessToDevices; - videoInput(void); - void processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip); - void updateListOfDevices(); }; - DebugPrintOut::DebugPrintOut(void):verbose(true) { } - DebugPrintOut::~DebugPrintOut(void) { } - DebugPrintOut& DebugPrintOut::getInstance() { static DebugPrintOut instance; - return instance; } - void DebugPrintOut::printOut(const wchar_t *format, ...) { if(verbose) { int i = 0; - wchar_t *p = NULL; - va_list args; - va_start(args, format); - if(wcscmp(format, L"%i")) { i = va_arg (args, int); } - if(wcscmp(format, L"%s")) { p = va_arg (args, wchar_t *); } - wprintf(format, i,p); - va_end (args); } } - void DebugPrintOut::setVerbose(bool state) { verbose = state; } - LPCWSTR GetGUIDNameConstNew(const GUID& guid); HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz); - HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index); HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out); - unsigned int *GetParametr(GUID guid, MediaType &out) { - if(guid == MF_MT_YUV_MATRIX) + if(guid == MF_MT_YUV_MATRIX) return &(out.MF_MT_YUV_MATRIX); - - if(guid == MF_MT_VIDEO_LIGHTING) + if(guid == MF_MT_VIDEO_LIGHTING) return &(out.MF_MT_VIDEO_LIGHTING); - - if(guid == MF_MT_DEFAULT_STRIDE) + if(guid == MF_MT_DEFAULT_STRIDE) return &(out.MF_MT_DEFAULT_STRIDE); - - if(guid == MF_MT_VIDEO_CHROMA_SITING) + if(guid == MF_MT_VIDEO_CHROMA_SITING) return &(out.MF_MT_VIDEO_CHROMA_SITING); - - if(guid == MF_MT_VIDEO_NOMINAL_RANGE) + if(guid == MF_MT_VIDEO_NOMINAL_RANGE) return &(out.MF_MT_VIDEO_NOMINAL_RANGE); - - if(guid == MF_MT_ALL_SAMPLES_INDEPENDENT) + if(guid == MF_MT_ALL_SAMPLES_INDEPENDENT) return &(out.MF_MT_ALL_SAMPLES_INDEPENDENT); - - if(guid == MF_MT_FIXED_SIZE_SAMPLES) + if(guid == MF_MT_FIXED_SIZE_SAMPLES) return &(out.MF_MT_FIXED_SIZE_SAMPLES); - - if(guid == MF_MT_SAMPLE_SIZE) + if(guid == MF_MT_SAMPLE_SIZE) return &(out.MF_MT_SAMPLE_SIZE); - - if(guid == MF_MT_VIDEO_PRIMARIES) + if(guid == MF_MT_VIDEO_PRIMARIES) return &(out.MF_MT_VIDEO_PRIMARIES); - - if(guid == MF_MT_INTERLACE_MODE) + if(guid == MF_MT_INTERLACE_MODE) return &(out.MF_MT_INTERLACE_MODE); - return NULL; } - HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index, MediaType &out) { WCHAR *pGuidName = NULL; WCHAR *pGuidValName = NULL; - GUID guid = { 0 }; - PROPVARIANT var; PropVariantInit(&var); - HRESULT hr = pAttr->GetItemByIndex(index, &guid, &var); - if (FAILED(hr)) { goto done; } - hr = GetGUIDNameNew(guid, &pGuidName); - if (FAILED(hr)) { goto done; } - hr = SpecialCaseAttributeValueNew(guid, var, out); - unsigned int *p; - if (FAILED(hr)) { goto done; @@ -717,118 +490,83 @@ HRESULT LogAttributeValueByIndexNew(IMFAttributes *pAttr, DWORD index, MediaType switch (var.vt) { case VT_UI4: - p = GetParametr(guid, out); - - if(p) + if(p) { *p = var.ulVal; } - break; - case VT_UI8: - break; - case VT_R8: - break; - case VT_CLSID: if(guid == MF_MT_AM_FORMAT_TYPE) { hr = GetGUIDNameNew(*var.puuid, &pGuidValName); - if (SUCCEEDED(hr)) { out.MF_MT_AM_FORMAT_TYPE = MF_MT_AM_FORMAT_TYPE; - out.pMF_MT_AM_FORMAT_TYPEName = pGuidValName; - pGuidValName = NULL; } } - if(guid == MF_MT_MAJOR_TYPE) { hr = GetGUIDNameNew(*var.puuid, &pGuidValName); - if (SUCCEEDED(hr)) { out.MF_MT_MAJOR_TYPE = MF_MT_MAJOR_TYPE; - out.pMF_MT_MAJOR_TYPEName = pGuidValName; - pGuidValName = NULL; } } - if(guid == MF_MT_SUBTYPE) { hr = GetGUIDNameNew(*var.puuid, &pGuidValName); - if (SUCCEEDED(hr)) { out.MF_MT_SUBTYPE = MF_MT_SUBTYPE; - out.pMF_MT_SUBTYPEName = pGuidValName; - pGuidValName = NULL; } } - break; - case VT_LPWSTR: - break; - case VT_VECTOR | VT_UI1: - break; - case VT_UNKNOWN: - break; - default: - break; } } - done: CoTaskMemFree(pGuidName); CoTaskMemFree(pGuidValName); PropVariantClear(&var); return hr; } - HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz) { HRESULT hr = S_OK; WCHAR *pName = NULL; - LPCWSTR pcwsz = GetGUIDNameConstNew(guid); if (pcwsz) { size_t cchLength = 0; - hr = StringCchLengthW(pcwsz, STRSAFE_MAX_CCH, &cchLength); if (FAILED(hr)) { goto done; } - pName = (WCHAR*)CoTaskMemAlloc((cchLength + 1) * sizeof(WCHAR)); - if (pName == NULL) { hr = E_OUTOFMEMORY; goto done; } - hr = StringCchCopyW(pName, cchLength + 1, pcwsz); if (FAILED(hr)) { @@ -839,7 +577,6 @@ HRESULT GetGUIDNameNew(const GUID& guid, WCHAR **ppwsz) { hr = StringFromCLSID(guid, &pName); } - done: if (FAILED(hr)) { @@ -852,97 +589,73 @@ done: } return hr; } - void LogUINT32AsUINT64New(const PROPVARIANT& var, UINT32 &uHigh, UINT32 &uLow) { Unpack2UINT32AsUINT64(var.uhVal.QuadPart, &uHigh, &uLow); - } - float OffsetToFloatNew(const MFOffset& offset) { return offset.value + (static_cast(offset.fract) / 65536.0f); } - HRESULT LogVideoAreaNew(const PROPVARIANT& var) { if (var.caub.cElems < sizeof(MFVideoArea)) { return S_OK; } - return S_OK; } - HRESULT SpecialCaseAttributeValueNew(GUID guid, const PROPVARIANT& var, MediaType &out) { if (guid == MF_MT_FRAME_SIZE) { UINT32 uHigh = 0, uLow = 0; - LogUINT32AsUINT64New(var, uHigh, uLow); - out.width = uHigh; - out.height = uLow; - out.MF_MT_FRAME_SIZE = out.width * out.height; } else if (guid == MF_MT_FRAME_RATE) { UINT32 uHigh = 0, uLow = 0; - LogUINT32AsUINT64New(var, uHigh, uLow); - out.MF_MT_FRAME_RATE = uHigh; - out.MF_MT_FRAME_RATE_low = uLow; } else if (guid == MF_MT_FRAME_RATE_RANGE_MAX) { UINT32 uHigh = 0, uLow = 0; - LogUINT32AsUINT64New(var, uHigh, uLow); - out.MF_MT_FRAME_RATE_RANGE_MAX = uHigh; - out.MF_MT_FRAME_RATE_RANGE_MAX_low = uLow; } else if (guid == MF_MT_FRAME_RATE_RANGE_MIN) { UINT32 uHigh = 0, uLow = 0; - LogUINT32AsUINT64New(var, uHigh, uLow); - out.MF_MT_FRAME_RATE_RANGE_MIN = uHigh; - out.MF_MT_FRAME_RATE_RANGE_MIN_low = uLow; } else if (guid == MF_MT_PIXEL_ASPECT_RATIO) { UINT32 uHigh = 0, uLow = 0; - LogUINT32AsUINT64New(var, uHigh, uLow); - out.MF_MT_PIXEL_ASPECT_RATIO = uHigh; - out.MF_MT_PIXEL_ASPECT_RATIO_low = uLow; - } + } else { return S_FALSE; } return S_OK; } - #ifndef IF_EQUAL_RETURN #define IF_EQUAL_RETURN(param, val) if(val == param) return L#val #endif - LPCWSTR GetGUIDNameConstNew(const GUID& guid) { IF_EQUAL_RETURN(guid, MF_MT_MAJOR_TYPE); @@ -1010,14 +723,12 @@ LPCWSTR GetGUIDNameConstNew(const GUID& guid) IF_EQUAL_RETURN(guid, MF_MT_DV_VAUX_CTRL_PACK); IF_EQUAL_RETURN(guid, MF_MT_ARBITRARY_HEADER); IF_EQUAL_RETURN(guid, MF_MT_ARBITRARY_FORMAT); - IF_EQUAL_RETURN(guid, MF_MT_IMAGE_LOSS_TOLERANT); + IF_EQUAL_RETURN(guid, MF_MT_IMAGE_LOSS_TOLERANT); IF_EQUAL_RETURN(guid, MF_MT_MPEG4_SAMPLE_DESCRIPTION); IF_EQUAL_RETURN(guid, MF_MT_MPEG4_CURRENT_SAMPLE_ENTRY); - IF_EQUAL_RETURN(guid, MF_MT_ORIGINAL_4CC); + IF_EQUAL_RETURN(guid, MF_MT_ORIGINAL_4CC); IF_EQUAL_RETURN(guid, MF_MT_ORIGINAL_WAVE_FORMAT_TAG); - // Media types - IF_EQUAL_RETURN(guid, MFMediaType_Audio); IF_EQUAL_RETURN(guid, MFMediaType_Video); IF_EQUAL_RETURN(guid, MFMediaType_Protected); @@ -1027,9 +738,8 @@ LPCWSTR GetGUIDNameConstNew(const GUID& guid) IF_EQUAL_RETURN(guid, MFMediaType_HTML); IF_EQUAL_RETURN(guid, MFMediaType_Binary); IF_EQUAL_RETURN(guid, MFMediaType_FileTransfer); - IF_EQUAL_RETURN(guid, MFVideoFormat_AI44); // FCC('AI44') - IF_EQUAL_RETURN(guid, MFVideoFormat_ARGB32); // D3DFMT_A8R8G8B8 + IF_EQUAL_RETURN(guid, MFVideoFormat_ARGB32); // D3DFMT_A8R8G8B8 IF_EQUAL_RETURN(guid, MFVideoFormat_AYUV); // FCC('AYUV') IF_EQUAL_RETURN(guid, MFVideoFormat_DV25); // FCC('dv25') IF_EQUAL_RETURN(guid, MFVideoFormat_DV50); // FCC('dv50') @@ -1053,10 +763,10 @@ LPCWSTR GetGUIDNameConstNew(const GUID& guid) IF_EQUAL_RETURN(guid, MFVideoFormat_P016); // FCC('P016') IF_EQUAL_RETURN(guid, MFVideoFormat_P210); // FCC('P210') IF_EQUAL_RETURN(guid, MFVideoFormat_P216); // FCC('P216') - IF_EQUAL_RETURN(guid, MFVideoFormat_RGB24); // D3DFMT_R8G8B8 - IF_EQUAL_RETURN(guid, MFVideoFormat_RGB32); // D3DFMT_X8R8G8B8 - IF_EQUAL_RETURN(guid, MFVideoFormat_RGB555); // D3DFMT_X1R5G5B5 - IF_EQUAL_RETURN(guid, MFVideoFormat_RGB565); // D3DFMT_R5G6B5 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB24); // D3DFMT_R8G8B8 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB32); // D3DFMT_X8R8G8B8 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB555); // D3DFMT_X1R5G5B5 + IF_EQUAL_RETURN(guid, MFVideoFormat_RGB565); // D3DFMT_R5G6B5 IF_EQUAL_RETURN(guid, MFVideoFormat_RGB8); IF_EQUAL_RETURN(guid, MFVideoFormat_UYVY); // FCC('UYVY') IF_EQUAL_RETURN(guid, MFVideoFormat_v210); // FCC('v210') @@ -1074,162 +784,118 @@ LPCWSTR GetGUIDNameConstNew(const GUID& guid) IF_EQUAL_RETURN(guid, MFVideoFormat_YUY2); // FCC('YUY2') IF_EQUAL_RETURN(guid, MFVideoFormat_YV12); // FCC('YV12') IF_EQUAL_RETURN(guid, MFVideoFormat_YVYU); - - IF_EQUAL_RETURN(guid, MFAudioFormat_PCM); // WAVE_FORMAT_PCM - IF_EQUAL_RETURN(guid, MFAudioFormat_Float); // WAVE_FORMAT_IEEE_FLOAT - IF_EQUAL_RETURN(guid, MFAudioFormat_DTS); // WAVE_FORMAT_DTS - IF_EQUAL_RETURN(guid, MFAudioFormat_Dolby_AC3_SPDIF); // WAVE_FORMAT_DOLBY_AC3_SPDIF - IF_EQUAL_RETURN(guid, MFAudioFormat_DRM); // WAVE_FORMAT_DRM - IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudioV8); // WAVE_FORMAT_WMAUDIO2 - IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudioV9); // WAVE_FORMAT_WMAUDIO3 - IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudio_Lossless); // WAVE_FORMAT_WMAUDIO_LOSSLESS - IF_EQUAL_RETURN(guid, MFAudioFormat_WMASPDIF); // WAVE_FORMAT_WMASPDIF - IF_EQUAL_RETURN(guid, MFAudioFormat_MSP1); // WAVE_FORMAT_WMAVOICE9 - IF_EQUAL_RETURN(guid, MFAudioFormat_MP3); // WAVE_FORMAT_MPEGLAYER3 - IF_EQUAL_RETURN(guid, MFAudioFormat_MPEG); // WAVE_FORMAT_MPEG - IF_EQUAL_RETURN(guid, MFAudioFormat_AAC); // WAVE_FORMAT_MPEG_HEAAC - IF_EQUAL_RETURN(guid, MFAudioFormat_ADTS); // WAVE_FORMAT_MPEG_ADTS_AAC - + IF_EQUAL_RETURN(guid, MFAudioFormat_PCM); // WAVE_FORMAT_PCM + IF_EQUAL_RETURN(guid, MFAudioFormat_Float); // WAVE_FORMAT_IEEE_FLOAT + IF_EQUAL_RETURN(guid, MFAudioFormat_DTS); // WAVE_FORMAT_DTS + IF_EQUAL_RETURN(guid, MFAudioFormat_Dolby_AC3_SPDIF); // WAVE_FORMAT_DOLBY_AC3_SPDIF + IF_EQUAL_RETURN(guid, MFAudioFormat_DRM); // WAVE_FORMAT_DRM + IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudioV8); // WAVE_FORMAT_WMAUDIO2 + IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudioV9); // WAVE_FORMAT_WMAUDIO3 + IF_EQUAL_RETURN(guid, MFAudioFormat_WMAudio_Lossless); // WAVE_FORMAT_WMAUDIO_LOSSLESS + IF_EQUAL_RETURN(guid, MFAudioFormat_WMASPDIF); // WAVE_FORMAT_WMASPDIF + IF_EQUAL_RETURN(guid, MFAudioFormat_MSP1); // WAVE_FORMAT_WMAVOICE9 + IF_EQUAL_RETURN(guid, MFAudioFormat_MP3); // WAVE_FORMAT_MPEGLAYER3 + IF_EQUAL_RETURN(guid, MFAudioFormat_MPEG); // WAVE_FORMAT_MPEG + IF_EQUAL_RETURN(guid, MFAudioFormat_AAC); // WAVE_FORMAT_MPEG_HEAAC + IF_EQUAL_RETURN(guid, MFAudioFormat_ADTS); // WAVE_FORMAT_MPEG_ADTS_AAC return NULL; } - FormatReader::FormatReader(void) { } - MediaType FormatReader::Read(IMFMediaType *pType) { UINT32 count = 0; - HRESULT hr = S_OK; - MediaType out; - hr = pType->LockStore(); - if (FAILED(hr)) { return out; } - hr = pType->GetCount(&count); - if (FAILED(hr)) { return out; } - for (UINT32 i = 0; i < count; i++) { hr = LogAttributeValueByIndexNew(pType, i, out); - if (FAILED(hr)) { break; } } - hr = pType->UnlockStore(); - if (FAILED(hr)) { return out; } - return out; } - FormatReader::~FormatReader(void) { } - #define CHECK_HR(x) if (FAILED(x)) { goto done; } - ImageGrabber::ImageGrabber(unsigned int deviceID): m_cRef(1), ig_DeviceID(deviceID), ig_pSource(NULL), ig_pSession(NULL), ig_pTopology(NULL), ig_RIE(true), ig_Close(false) { } - - ImageGrabber::~ImageGrabber(void) { if (ig_pSession) { ig_pSession->Shutdown(); } - //SafeRelease(&ig_pSession); - //SafeRelease(&ig_pTopology); - DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Destroing instance of the ImageGrabber class \n", ig_DeviceID); - } - HRESULT ImageGrabber::initImageGrabber(IMFMediaSource *pSource, GUID VideoFormat) { IMFActivate *pSinkActivate = NULL; IMFMediaType *pType = NULL; - IMFPresentationDescriptor *pPD = NULL; IMFStreamDescriptor *pSD = NULL; IMFMediaTypeHandler *pHandler = NULL; IMFMediaType *pCurrentType = NULL; - HRESULT hr = S_OK; MediaType MT; - // Clean up. if (ig_pSession) { ig_pSession->Shutdown(); } - SafeRelease(&ig_pSession); SafeRelease(&ig_pTopology); - ig_pSource = pSource; - - - - hr = pSource->CreatePresentationDescriptor(&pPD); if (FAILED(hr)) goto err; - BOOL fSelected; hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); if (FAILED(hr)) goto err; - hr = pSD->GetMediaTypeHandler(&pHandler); if (FAILED(hr)) goto err; - DWORD cTypes = 0; hr = pHandler->GetMediaTypeCount(&cTypes); if (FAILED(hr)) goto err; - if(cTypes > 0) { hr = pHandler->GetCurrentMediaType(&pCurrentType); - if (FAILED(hr)) goto err; - MT = FormatReader::Read(pCurrentType); } - err: SafeRelease(&pPD); SafeRelease(&pSD); SafeRelease(&pHandler); SafeRelease(&pCurrentType); - unsigned int sizeRawImage = 0; - if(VideoFormat == MFVideoFormat_RGB24) { sizeRawImage = MT.MF_MT_FRAME_SIZE * 3; @@ -1238,151 +904,103 @@ err: { sizeRawImage = MT.MF_MT_FRAME_SIZE * 4; } - CHECK_HR(hr = RawImage::CreateInstance(&ig_RIFirst, sizeRawImage)); - CHECK_HR(hr = RawImage::CreateInstance(&ig_RISecond, sizeRawImage)); - ig_RIOut = ig_RISecond; - - // Configure the media type that the Sample Grabber will receive. // Setting the major and subtype is usually enough for the topology loader // to resolve the topology. - CHECK_HR(hr = MFCreateMediaType(&pType)); CHECK_HR(hr = pType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video)); CHECK_HR(hr = pType->SetGUID(MF_MT_SUBTYPE, VideoFormat)); - // Create the sample grabber sink. CHECK_HR(hr = MFCreateSampleGrabberSinkActivate(pType, this, &pSinkActivate)); - // To run as fast as possible, set this attribute (requires Windows 7): CHECK_HR(hr = pSinkActivate->SetUINT32(MF_SAMPLEGRABBERSINK_IGNORE_CLOCK, TRUE)); - // Create the Media Session. - CHECK_HR(hr = MFCreateMediaSession(NULL, &ig_pSession)); - // Create the topology. CHECK_HR(hr = CreateTopology(pSource, pSinkActivate, &ig_pTopology)); - done: - // Clean up. if (FAILED(hr)) - { + { if (ig_pSession) { ig_pSession->Shutdown(); } - SafeRelease(&ig_pSession); SafeRelease(&ig_pTopology); } - SafeRelease(&pSinkActivate); SafeRelease(&pType); - return hr; } - void ImageGrabber::stopGrabbing() { if(ig_pSession) ig_pSession->Stop(); - - - DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Stopping of of grabbing of images\n", ig_DeviceID); } - HRESULT ImageGrabber::startGrabbing(void) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - IMFMediaEvent *pEvent = NULL; - PROPVARIANT var; PropVariantInit(&var); - HRESULT hr = S_OK; CHECK_HR(hr = ig_pSession->SetTopology(0, ig_pTopology)); CHECK_HR(hr = ig_pSession->Start(&GUID_NULL, &var)); - DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Start Grabbing of the images\n", ig_DeviceID); - for(;;) { - HRESULT hrStatus = S_OK; MediaEventType met; - if(!ig_pSession) break; hr = ig_pSession->GetEvent(0, &pEvent); if(!SUCCEEDED(hr)) { hr = S_OK; - goto done; } - hr = pEvent->GetStatus(&hrStatus); if(!SUCCEEDED(hr)) { hr = S_OK; - goto done; } - hr = pEvent->GetType(&met); if(!SUCCEEDED(hr)) { hr = S_OK; - goto done; } - if (met == MESessionEnded) - { + { DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MESessionEnded \n", ig_DeviceID); - ig_pSession->Stop(); - break; } - if (met == MESessionStopped) { DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MESessionStopped \n", ig_DeviceID); - break; } - - if (met == MEVideoCaptureDeviceRemoved) { DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: MEVideoCaptureDeviceRemoved \n", ig_DeviceID); - - break; + break; } - SafeRelease(&pEvent); } - DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Finish startGrabbing \n", ig_DeviceID); - done: SafeRelease(&pEvent); - SafeRelease(&ig_pSession); - SafeRelease(&ig_pTopology); return hr; } - HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSinkActivate, IMFTopology **ppTopo) { IMFTopology *pTopology = NULL; @@ -1391,25 +1009,19 @@ HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSink IMFMediaTypeHandler *pHandler = NULL; IMFTopologyNode *pNode1 = NULL; IMFTopologyNode *pNode2 = NULL; - HRESULT hr = S_OK; DWORD cStreams = 0; - CHECK_HR(hr = MFCreateTopology(&pTopology)); CHECK_HR(hr = pSource->CreatePresentationDescriptor(&pPD)); CHECK_HR(hr = pPD->GetStreamDescriptorCount(&cStreams)); - for (DWORD i = 0; i < cStreams; i++) { // In this example, we look for audio streams and connect them to the sink. - BOOL fSelected = FALSE; GUID majorType; - CHECK_HR(hr = pPD->GetStreamDescriptorByIndex(i, &fSelected, &pSD)); CHECK_HR(hr = pSD->GetMediaTypeHandler(&pHandler)); CHECK_HR(hr = pHandler->GetMajorType(&majorType)); - if (majorType == MFMediaType_Video && fSelected) { CHECK_HR(hr = AddSourceNode(pTopology, pSource, pPD, pSD, &pNode1)); @@ -1424,10 +1036,8 @@ HRESULT ImageGrabber::CreateTopology(IMFMediaSource *pSource, IMFActivate *pSink SafeRelease(&pSD); SafeRelease(&pHandler); } - *ppTopo = pTopology; (*ppTopo)->AddRef(); - done: SafeRelease(&pTopology); SafeRelease(&pNode1); @@ -1437,7 +1047,6 @@ done: SafeRelease(&pHandler); return hr; } - HRESULT ImageGrabber::AddSourceNode( IMFTopology *pTopology, // Topology. IMFMediaSource *pSource, // Media source. @@ -1446,23 +1055,19 @@ HRESULT ImageGrabber::AddSourceNode( IMFTopologyNode **ppNode) // Receives the node pointer. { IMFTopologyNode *pNode = NULL; - HRESULT hr = S_OK; CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_SOURCESTREAM_NODE, &pNode)); CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_SOURCE, pSource)); CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_PRESENTATION_DESCRIPTOR, pPD)); CHECK_HR(hr = pNode->SetUnknown(MF_TOPONODE_STREAM_DESCRIPTOR, pSD)); CHECK_HR(hr = pTopology->AddNode(pNode)); - // Return the pointer to the caller. *ppNode = pNode; (*ppNode)->AddRef(); - done: SafeRelease(&pNode); return hr; } - HRESULT ImageGrabber::AddOutputNode( IMFTopology *pTopology, // Topology. IMFActivate *pActivate, // Media sink activation object. @@ -1470,71 +1075,54 @@ HRESULT ImageGrabber::AddOutputNode( IMFTopologyNode **ppNode) // Receives the node pointer. { IMFTopologyNode *pNode = NULL; - HRESULT hr = S_OK; CHECK_HR(hr = MFCreateTopologyNode(MF_TOPOLOGY_OUTPUT_NODE, &pNode)); CHECK_HR(hr = pNode->SetObject(pActivate)); CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_STREAMID, dwId)); CHECK_HR(hr = pNode->SetUINT32(MF_TOPONODE_NOSHUTDOWN_ON_REMOVE, FALSE)); CHECK_HR(hr = pTopology->AddNode(pNode)); - // Return the pointer to the caller. *ppNode = pNode; (*ppNode)->AddRef(); - done: SafeRelease(&pNode); return hr; } - - - HRESULT ImageGrabber::CreateInstance(ImageGrabber **ppIG, unsigned int deviceID) { *ppIG = new (std::nothrow) ImageGrabber(deviceID); - if (ppIG == NULL) { return E_OUTOFMEMORY; } - DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"IMAGEGRABBER VIDEODEVICE %i: Creating instance of ImageGrabber\n", deviceID); - return S_OK; } - STDMETHODIMP ImageGrabber::QueryInterface(REFIID riid, void** ppv) { HRESULT hr = E_NOINTERFACE; *ppv = NULL; - if(riid == IID_IUnknown || riid == IID_IMFSampleGrabberSinkCallback) { *ppv = static_cast(this); hr = S_OK; } - if(riid == IID_IMFClockStateSink) { *ppv = static_cast(this); hr = S_OK; } - if(SUCCEEDED(hr)) { reinterpret_cast(*ppv)->AddRef(); } - return hr; } - STDMETHODIMP_(ULONG) ImageGrabber::AddRef() { return InterlockedIncrement(&m_cRef); } - STDMETHODIMP_(ULONG) ImageGrabber::Release() { ULONG cRef = InterlockedDecrement(&m_cRef); @@ -1544,45 +1132,38 @@ STDMETHODIMP_(ULONG) ImageGrabber::Release() } return cRef; } - STDMETHODIMP ImageGrabber::OnClockStart(MFTIME hnsSystemTime, LONGLONG llClockStartOffset) { (void)hnsSystemTime; (void)llClockStartOffset; return S_OK; } - STDMETHODIMP ImageGrabber::OnClockStop(MFTIME hnsSystemTime) { (void)hnsSystemTime; return S_OK; } - STDMETHODIMP ImageGrabber::OnClockPause(MFTIME hnsSystemTime) { (void)hnsSystemTime; return S_OK; } - STDMETHODIMP ImageGrabber::OnClockRestart(MFTIME hnsSystemTime) { (void)hnsSystemTime; return S_OK; } - STDMETHODIMP ImageGrabber::OnClockSetRate(MFTIME hnsSystemTime, float flRate) { (void)flRate; (void)hnsSystemTime; return S_OK; } - STDMETHODIMP ImageGrabber::OnSetPresentationClock(IMFPresentationClock* pClock) { (void)pClock; return S_OK; } - STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwSampleFlags, LONGLONG llSampleTime, LONGLONG llSampleDuration, const BYTE * pSampleBuffer, DWORD dwSampleSize) @@ -1592,74 +1173,54 @@ STDMETHODIMP ImageGrabber::OnProcessSample(REFGUID guidMajorMediaType, DWORD dwS (void)dwSampleFlags; (void)llSampleDuration; (void)dwSampleSize; - if(ig_RIE) { ig_RIFirst->fastCopy(pSampleBuffer); - ig_RIOut = ig_RIFirst; } else { ig_RISecond->fastCopy(pSampleBuffer); - ig_RIOut = ig_RISecond; } - ig_RIE = !ig_RIE; - return S_OK; } - STDMETHODIMP ImageGrabber::OnShutdown() { return S_OK; } - RawImage *ImageGrabber::getRawImage() { return ig_RIOut; } - DWORD WINAPI MainThreadFunction( LPVOID lpParam ) { ImageGrabberThread *pIGT = (ImageGrabberThread *)lpParam; - pIGT->run(); - - return 0; + return 0; } - HRESULT ImageGrabberThread::CreateInstance(ImageGrabberThread **ppIGT, IMFMediaSource *pSource, unsigned int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - *ppIGT = new (std::nothrow) ImageGrabberThread(pSource, deviceID); - if (ppIGT == NULL) { DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Memory cannot be allocated\n", deviceID); - return E_OUTOFMEMORY; } else DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Creating of the instance of ImageGrabberThread\n", deviceID); - return S_OK; } - ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int deviceID): igt_Handle(NULL), igt_stop(false) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - HRESULT hr = ImageGrabber::CreateInstance(&igt_pImageGrabber, deviceID); - igt_DeviceID = deviceID; - if(SUCCEEDED(hr)) { hr = igt_pImageGrabber->initImageGrabber(pSource, MFVideoFormat_RGB24); - if(!SUCCEEDED(hr)) { DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: There is a problem with initialization of the instance of the ImageGrabber class\n", deviceID); @@ -1674,73 +1235,57 @@ ImageGrabberThread::ImageGrabberThread(IMFMediaSource *pSource, unsigned int dev DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i There is a problem with creation of the instance of the ImageGrabber class\n", deviceID); } } - void ImageGrabberThread::setEmergencyStopEvent(void *userData, void(*func)(int, void *)) { if(func) { igt_func = func; - igt_userData = userData; } } - ImageGrabberThread::~ImageGrabberThread(void) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Destroing ImageGrabberThread\n", igt_DeviceID); - delete igt_pImageGrabber; } - void ImageGrabberThread::stop() { igt_stop = true; - if(igt_pImageGrabber) { igt_pImageGrabber->stopGrabbing(); } } - void ImageGrabberThread::start() { - igt_Handle = CreateThread( + igt_Handle = CreateThread( NULL, // default security attributes - 0, // use default stack size + 0, // use default stack size MainThreadFunction, // thread function name - this, // argument to thread function - 0, // use default creation flags - &igt_ThreadIdArray); // returns the thread identifier + this, // argument to thread function + 0, // use default creation flags + &igt_ThreadIdArray); // returns the thread identifier } - void ImageGrabberThread::run() { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if(igt_pImageGrabber) { DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Thread for grabbing images is started\n", igt_DeviceID); - HRESULT hr = igt_pImageGrabber->startGrabbing(); - - if(!SUCCEEDED(hr)) + if(!SUCCEEDED(hr)) { DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: There is a problem with starting the process of grabbing\n", igt_DeviceID); } - } else { DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i The thread is finished without execution of grabbing\n", igt_DeviceID); } - - if(!igt_stop) { DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Emergency Stop thread\n", igt_DeviceID); - if(igt_func) { igt_func(igt_DeviceID, igt_userData); @@ -1749,46 +1294,34 @@ void ImageGrabberThread::run() else DPO->printOut(L"IMAGEGRABBERTHREAD VIDEODEVICE %i: Finish thread\n", igt_DeviceID); } - ImageGrabber *ImageGrabberThread::getImageGrabber() { return igt_pImageGrabber; } - Media_Foundation::Media_Foundation(void) { HRESULT hr = MFStartup(MF_VERSION); - if(!SUCCEEDED(hr)) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"MEDIA FOUNDATION: It cannot be created!!!\n"); } } - Media_Foundation::~Media_Foundation(void) { HRESULT hr = MFShutdown(); - if(!SUCCEEDED(hr)) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"MEDIA FOUNDATION: Resources cannot be released\n"); } } - bool Media_Foundation::buildListOfDevices() { HRESULT hr = S_OK; - IMFAttributes *pAttributes = NULL; - CoInitialize(NULL); - hr = MFCreateAttributes(&pAttributes, 1); - if (SUCCEEDED(hr)) { hr = pAttributes->SetGUID( @@ -1796,7 +1329,6 @@ bool Media_Foundation::buildListOfDevices() MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID ); } - if (SUCCEEDED(hr)) { videoDevices *vDs = &videoDevices::getInstance(); @@ -1807,80 +1339,61 @@ bool Media_Foundation::buildListOfDevices() DebugPrintOut *DPO = &DebugPrintOut::getInstance(); DPO->printOut(L"MEDIA FOUNDATION: The access to the video cameras denied\n"); } - SafeRelease(&pAttributes); - return (SUCCEEDED(hr)); } - Media_Foundation& Media_Foundation::getInstance() { static Media_Foundation instance; - return instance; } - RawImage::RawImage(unsigned int size): ri_new(false), ri_pixels(NULL) { ri_size = size; - ri_pixels = new unsigned char[size]; - memset((void *)ri_pixels,0,ri_size); } - bool RawImage::isNew() { return ri_new; } - unsigned int RawImage::getSize() { return ri_size; } - RawImage::~RawImage(void) { delete []ri_pixels; - ri_pixels = NULL; } - long RawImage::CreateInstance(RawImage **ppRImage,unsigned int size) { *ppRImage = new (std::nothrow) RawImage(size); - if (ppRImage == NULL) { return E_OUTOFMEMORY; } return S_OK; } - void RawImage::setCopy(const BYTE * pSampleBuffer) { memcpy(ri_pixels, pSampleBuffer, ri_size); - ri_new = true; } - void RawImage::fastCopy(const BYTE * pSampleBuffer) { memcpy(ri_pixels, pSampleBuffer, ri_size); ri_new = true; } - unsigned char * RawImage::getpPixels() { ri_new = false; return ri_pixels; } - videoDevice::videoDevice(void): vd_IsSetuped(false), vd_LockOut(OpenLock), vd_pFriendlyName(NULL), vd_Width(0), vd_Height(0), vd_pSource(NULL), vd_func(NULL), vd_userData(NULL) { } - void videoDevice::setParametrs(CamParametrs parametrs) { if(vd_IsSetuped) @@ -1888,198 +1401,145 @@ void videoDevice::setParametrs(CamParametrs parametrs) if(vd_pSource) { Parametr *pParametr = (Parametr *)(¶metrs); - Parametr *pPrevParametr = (Parametr *)(&vd_PrevParametrs); - IAMVideoProcAmp *pProcAmp = NULL; HRESULT hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcAmp)); - if (SUCCEEDED(hr)) { for(unsigned int i = 0; i < 10; i++) { if(pPrevParametr[i].CurrentValue != pParametr[i].CurrentValue || pPrevParametr[i].Flag != pParametr[i].Flag) hr = pProcAmp->Set(VideoProcAmp_Brightness + i, pParametr[i].CurrentValue, pParametr[i].Flag); - } - pProcAmp->Release(); } - IAMCameraControl *pProcControl = NULL; hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcControl)); - if (SUCCEEDED(hr)) { for(unsigned int i = 0; i < 7; i++) { if(pPrevParametr[10 + i].CurrentValue != pParametr[10 + i].CurrentValue || pPrevParametr[10 + i].Flag != pParametr[10 + i].Flag) - hr = pProcControl->Set(CameraControl_Pan+i, pParametr[10 + i].CurrentValue, pParametr[10 + i].Flag); + hr = pProcControl->Set(CameraControl_Pan+i, pParametr[10 + i].CurrentValue, pParametr[10 + i].Flag); } - pProcControl->Release(); } - vd_PrevParametrs = parametrs; } } } - CamParametrs videoDevice::getParametrs() { CamParametrs out; - if(vd_IsSetuped) { if(vd_pSource) { Parametr *pParametr = (Parametr *)(&out); - IAMVideoProcAmp *pProcAmp = NULL; HRESULT hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcAmp)); - if (SUCCEEDED(hr)) { for(unsigned int i = 0; i < 10; i++) { Parametr temp; - hr = pProcAmp->GetRange(VideoProcAmp_Brightness+i, &temp.Min, &temp.Max, &temp.Step, &temp.Default, &temp.Flag); - if (SUCCEEDED(hr)) { temp.CurrentValue = temp.Default; - pParametr[i] = temp; } } - pProcAmp->Release(); } - IAMCameraControl *pProcControl = NULL; hr = vd_pSource->QueryInterface(IID_PPV_ARGS(&pProcControl)); - if (SUCCEEDED(hr)) { for(unsigned int i = 0; i < 7; i++) { Parametr temp; - hr = pProcControl->GetRange(CameraControl_Pan+i, &temp.Min, &temp.Max, &temp.Step, &temp.Default, &temp.Flag); - if (SUCCEEDED(hr)) { temp.CurrentValue = temp.Default; - pParametr[10 + i] = temp; } } - pProcControl->Release(); } } } - return out; } - long videoDevice::resetDevice(IMFActivate *pActivate) { HRESULT hr = -1; - vd_CurrentFormats.clear(); - if(vd_pFriendlyName) CoTaskMemFree(vd_pFriendlyName); - vd_pFriendlyName = NULL; - if(pActivate) - { + { IMFMediaSource *pSource = NULL; - hr = pActivate->GetAllocatedString( MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME, &vd_pFriendlyName, NULL ); - hr = pActivate->ActivateObject( __uuidof(IMFMediaSource), (void**)&pSource ); - enumerateCaptureFormats(pSource); - buildLibraryofTypes(); - SafeRelease(&pSource); - - if(FAILED(hr)) - { + if(FAILED(hr)) + { vd_pFriendlyName = NULL; - DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"VIDEODEVICE %i: IMFMediaSource interface cannot be created \n", vd_CurrentNumber); } } - return hr; } - long videoDevice::readInfoOfDevice(IMFActivate *pActivate, unsigned int Num) { HRESULT hr = -1; - vd_CurrentNumber = Num; - hr = resetDevice(pActivate); - return hr; } - long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice) { HRESULT hr = S_OK; - IMFActivate **ppDevices = NULL; - DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - UINT32 count; - wchar_t *newFriendlyName = NULL; - hr = MFEnumDeviceSources(pAttributes, &ppDevices, &count); - if (SUCCEEDED(hr)) { if(count > 0) { if(count > vd_CurrentNumber) - { + { hr = ppDevices[vd_CurrentNumber]->GetAllocatedString( MF_DEVSOURCE_ATTRIBUTE_FRIENDLY_NAME, &newFriendlyName, NULL ); - if (SUCCEEDED(hr)) { if(wcscmp(newFriendlyName, vd_pFriendlyName) != 0) { DPO->printOut(L"VIDEODEVICE %i: Chosen device cannot be found \n", vd_CurrentNumber); - hr = -1; - pDevice = NULL; } else { *pDevice = ppDevices[vd_CurrentNumber]; - (*pDevice)->AddRef(); } } @@ -2087,20 +1547,16 @@ long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice) { DPO->printOut(L"VIDEODEVICE %i: Name of device cannot be gotten \n", vd_CurrentNumber); } - } else { DPO->printOut(L"VIDEODEVICE %i: Number of devices more than corrent number of the device \n", vd_CurrentNumber); - hr = -1; } - for(UINT32 i = 0; i < count; i++) { SafeRelease(&ppDevices[i]); } - SafeRelease(ppDevices); } else @@ -2110,69 +1566,50 @@ long videoDevice::checkDevice(IMFAttributes *pAttributes, IMFActivate **pDevice) { DPO->printOut(L"VIDEODEVICE %i: List of DeviceSources cannot be enumerated \n", vd_CurrentNumber); } - return hr; } - long videoDevice::initDevice() { HRESULT hr = -1; - IMFAttributes *pAttributes = NULL; - IMFActivate * vd_pActivate= NULL; - DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - CoInitialize(NULL); - hr = MFCreateAttributes(&pAttributes, 1); - if (SUCCEEDED(hr)) { hr = pAttributes->SetGUID( MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE, MF_DEVSOURCE_ATTRIBUTE_SOURCE_TYPE_VIDCAP_GUID ); - } - + } if (SUCCEEDED(hr)) { hr = checkDevice(pAttributes, &vd_pActivate); - if (SUCCEEDED(hr) && vd_pActivate) { SafeRelease(&vd_pSource); - hr = vd_pActivate->ActivateObject( __uuidof(IMFMediaSource), (void**)&vd_pSource ); - if (SUCCEEDED(hr)) { - } - SafeRelease(&vd_pActivate); } else { DPO->printOut(L"VIDEODEVICE %i: Device there is not \n", vd_CurrentNumber); } - } + } else { - DPO->printOut(L"VIDEODEVICE %i: The attribute of video cameras cannot be getting \n", vd_CurrentNumber); - } - SafeRelease(&pAttributes); - return hr; } - MediaType videoDevice::getFormat(unsigned int id) { if(id < vd_CurrentFormats.size()) @@ -2180,49 +1617,35 @@ MediaType videoDevice::getFormat(unsigned int id) return vd_CurrentFormats[id]; } else return MediaType(); - } - int videoDevice::getCountFormats() { return vd_CurrentFormats.size(); } - void videoDevice::setEmergencyStopEvent(void *userData, void(*func)(int, void *)) { vd_func = func; - vd_userData = userData; } - void videoDevice::closeDevice() -{ +{ if(vd_IsSetuped) { vd_IsSetuped = false; - vd_pSource->Stop(); - SafeRelease(&vd_pSource); - if(vd_LockOut == RawDataLock) { vd_pImGrTh->stop(); - Sleep(500); - delete vd_pImGrTh; } - vd_pImGrTh = NULL; - - vd_LockOut = OpenLock; - + vd_LockOut = OpenLock; DebugPrintOut *DPO = &DebugPrintOut::getInstance(); DPO->printOut(L"VIDEODEVICE %i: Device is stopped \n", vd_CurrentNumber); } } - unsigned int videoDevice::getWidth() { if(vd_IsSetuped) @@ -2230,60 +1653,46 @@ unsigned int videoDevice::getWidth() else return 0; } - unsigned int videoDevice::getHeight() { if(vd_IsSetuped) return vd_Height; - else + else return 0; } - IMFMediaSource *videoDevice::getMediaSource() { IMFMediaSource *out = NULL; - if(vd_LockOut == OpenLock) { - vd_LockOut = MediaSourceLock; - + vd_LockOut = MediaSourceLock; out = vd_pSource; } - return out; } - int videoDevice::findType(unsigned int size, unsigned int frameRate) -{ +{ if(vd_CaptureFormats.size() == 0) return 0; - FrameRateMap FRM = vd_CaptureFormats[size]; - if(FRM.size() == 0) return 0; - UINT64 frameRateMax = 0; SUBTYPEMap STMMax; - if(frameRate == 0) { std::map::iterator f = FRM.begin(); - for(; f != FRM.end(); f++) { if((*f).first >= frameRateMax) { frameRateMax = (*f).first; - STMMax = (*f).second; } - } - + } } else { std::map::iterator f = FRM.begin(); - for(; f != FRM.end(); f++) { if((*f).first >= frameRateMax) @@ -2291,98 +1700,68 @@ int videoDevice::findType(unsigned int size, unsigned int frameRate) if(frameRate > (*f).first) { frameRateMax = (*f).first; - STMMax = (*f).second; } } } } - if(STMMax.size() == 0) return 0; - - std::map::iterator S = STMMax.begin(); - vectorNum VN = (*S).second; - if(VN.size() == 0) return 0; - return VN[0]; - } - void videoDevice::buildLibraryofTypes() { unsigned int size; - unsigned int framerate; - std::vector::iterator i = vd_CurrentFormats.begin(); - int count = 0; - for(; i != vd_CurrentFormats.end(); i++) { size = (*i).MF_MT_FRAME_SIZE; - framerate = (*i).MF_MT_FRAME_RATE; - FrameRateMap FRM = vd_CaptureFormats[size]; - SUBTYPEMap STM = FRM[framerate]; - String subType((*i).pMF_MT_SUBTYPEName); - vectorNum VN = STM[subType]; - VN.push_back(count); - STM[subType] = VN; - FRM[framerate] = STM; - vd_CaptureFormats[size] = FRM; - count++; } } - long videoDevice::setDeviceFormat(IMFMediaSource *pSource, unsigned long dwFormatIndex) { IMFPresentationDescriptor *pPD = NULL; IMFStreamDescriptor *pSD = NULL; IMFMediaTypeHandler *pHandler = NULL; IMFMediaType *pType = NULL; - HRESULT hr = pSource->CreatePresentationDescriptor(&pPD); if (FAILED(hr)) { goto done; } - BOOL fSelected; hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); if (FAILED(hr)) { goto done; } - hr = pSD->GetMediaTypeHandler(&pHandler); if (FAILED(hr)) { goto done; } - hr = pHandler->GetMediaTypeByIndex((DWORD)dwFormatIndex, &pType); if (FAILED(hr)) { goto done; } - hr = pHandler->SetCurrentMediaType(pType); - done: SafeRelease(&pPD); SafeRelease(&pSD); @@ -2390,243 +1769,179 @@ done: SafeRelease(&pType); return hr; } - bool videoDevice::isDeviceSetup() { return vd_IsSetuped; } - RawImage * videoDevice::getRawImageOut() { if(!vd_IsSetuped) return NULL; - if(vd_pImGrTh) - return vd_pImGrTh->getImageGrabber()->getRawImage(); + return vd_pImGrTh->getImageGrabber()->getRawImage(); else { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"VIDEODEVICE %i: The instance of ImageGrabberThread class does not exist \n", vd_CurrentNumber); } return NULL; } - bool videoDevice::isFrameNew() { if(!vd_IsSetuped) return false; - - if(vd_LockOut == RawDataLock || vd_LockOut == OpenLock) + if(vd_LockOut == RawDataLock || vd_LockOut == OpenLock) { if(vd_LockOut == OpenLock) { vd_LockOut = RawDataLock; - HRESULT hr = ImageGrabberThread::CreateInstance(&vd_pImGrTh, vd_pSource, vd_CurrentNumber); - if(FAILED(hr)) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"VIDEODEVICE %i: The instance of ImageGrabberThread class cannot be created.\n", vd_CurrentNumber); - return false; } - vd_pImGrTh->setEmergencyStopEvent(vd_userData, vd_func); - vd_pImGrTh->start(); - return true; } - if(vd_pImGrTh) - return vd_pImGrTh->getImageGrabber()->getRawImage()->isNew(); - + return vd_pImGrTh->getImageGrabber()->getRawImage()->isNew(); } - return false; } - bool videoDevice::isDeviceMediaSource() { if(vd_LockOut == MediaSourceLock) return true; - return false; } - bool videoDevice::isDeviceRawDataSource() { if(vd_LockOut == RawDataLock) return true; - return false; } - bool videoDevice::setupDevice(unsigned int id) -{ +{ DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if(!vd_IsSetuped) { HRESULT hr = -1; - hr = initDevice(); - if(SUCCEEDED(hr)) - { - vd_Width = vd_CurrentFormats[id].width; - + { + vd_Width = vd_CurrentFormats[id].width; vd_Height = vd_CurrentFormats[id].height; - hr = setDeviceFormat(vd_pSource, (DWORD) id); - vd_IsSetuped = (SUCCEEDED(hr)); - if(vd_IsSetuped) DPO->printOut(L"\n\nVIDEODEVICE %i: Device is setuped \n", vd_CurrentNumber); - vd_PrevParametrs = getParametrs(); - return vd_IsSetuped; } else { DPO->printOut(L"VIDEODEVICE %i: Interface IMFMediaSource cannot be got \n", vd_CurrentNumber); - return false; } } else { DPO->printOut(L"VIDEODEVICE %i: Device is setuped already \n", vd_CurrentNumber); - return false; - } + } } - bool videoDevice::setupDevice(unsigned int w, unsigned int h, unsigned int idealFramerate) -{ +{ unsigned int id = findType(w * h, idealFramerate); - return setupDevice(id); } - wchar_t *videoDevice::getName() { return vd_pFriendlyName; } - videoDevice::~videoDevice(void) -{ +{ closeDevice(); - SafeRelease(&vd_pSource); - if(vd_pFriendlyName) CoTaskMemFree(vd_pFriendlyName); } - long videoDevice::enumerateCaptureFormats(IMFMediaSource *pSource) { IMFPresentationDescriptor *pPD = NULL; IMFStreamDescriptor *pSD = NULL; IMFMediaTypeHandler *pHandler = NULL; IMFMediaType *pType = NULL; - HRESULT hr = pSource->CreatePresentationDescriptor(&pPD); if (FAILED(hr)) { goto done; } - BOOL fSelected; hr = pPD->GetStreamDescriptorByIndex(0, &fSelected, &pSD); if (FAILED(hr)) { goto done; } - hr = pSD->GetMediaTypeHandler(&pHandler); if (FAILED(hr)) { goto done; } - DWORD cTypes = 0; hr = pHandler->GetMediaTypeCount(&cTypes); if (FAILED(hr)) { goto done; } - for (DWORD i = 0; i < cTypes; i++) { hr = pHandler->GetMediaTypeByIndex(i, &pType); - if (FAILED(hr)) { goto done; } - MediaType MT = FormatReader::Read(pType); - vd_CurrentFormats.push_back(MT); - SafeRelease(&pType); } - done: SafeRelease(&pPD); SafeRelease(&pSD); SafeRelease(&pHandler); SafeRelease(&pType); - return hr; } - - videoDevices::videoDevices(void): count(0) {} - void videoDevices::clearDevices() { std::vector::iterator i = vds_Devices.begin(); - for(; i != vds_Devices.end(); ++i) delete (*i); - vds_Devices.clear(); } - videoDevices::~videoDevices(void) -{ +{ clearDevices(); } - videoDevice * videoDevices::getDevice(unsigned int i) { if(i >= vds_Devices.size()) { return NULL; } - if(i < 0) { return NULL; } - return vds_Devices[i]; } - long videoDevices::initDevices(IMFAttributes *pAttributes) { HRESULT hr = S_OK; - IMFActivate **ppDevices = NULL; - clearDevices(); - hr = MFEnumDeviceSources(pAttributes, &ppDevices, &count); - if (SUCCEEDED(hr)) { if(count > 0) @@ -2636,7 +1951,6 @@ long videoDevices::initDevices(IMFAttributes *pAttributes) videoDevice *vd = new videoDevice; vd->readInfoOfDevice(ppDevices[i], i); vds_Devices.push_back(vd); - SafeRelease(&ppDevices[i]); } SafeRelease(ppDevices); @@ -2647,154 +1961,99 @@ long videoDevices::initDevices(IMFAttributes *pAttributes) else { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"VIDEODEVICES: The instances of the videoDevice class cannot be created\n"); } - return hr; } - size_t videoDevices::getCount() { return vds_Devices.size(); } - -videoDevices& videoDevices::getInstance() +videoDevices& videoDevices::getInstance() { static videoDevices instance; - return instance; } - Parametr::Parametr() { CurrentValue = 0; - Min = 0; - Max = 0; - Step = 0; - - Default = 0; - + Default = 0; Flag = 0; } - MediaType::MediaType() { pMF_MT_AM_FORMAT_TYPEName = NULL; - pMF_MT_MAJOR_TYPEName = NULL; - pMF_MT_SUBTYPEName = NULL; - Clear(); } - MediaType::~MediaType() { Clear(); } - void MediaType::Clear() { - MF_MT_FRAME_SIZE = 0; - height = 0; - width = 0; - MF_MT_YUV_MATRIX = 0; - MF_MT_VIDEO_LIGHTING = 0; - MF_MT_DEFAULT_STRIDE = 0; - MF_MT_VIDEO_CHROMA_SITING = 0; - MF_MT_FIXED_SIZE_SAMPLES = 0; - MF_MT_VIDEO_NOMINAL_RANGE = 0; - MF_MT_FRAME_RATE = 0; - MF_MT_FRAME_RATE_low = 0; - MF_MT_PIXEL_ASPECT_RATIO = 0; - MF_MT_PIXEL_ASPECT_RATIO_low = 0; - MF_MT_ALL_SAMPLES_INDEPENDENT = 0; - MF_MT_FRAME_RATE_RANGE_MIN = 0; - MF_MT_FRAME_RATE_RANGE_MIN_low = 0; - MF_MT_SAMPLE_SIZE = 0; - MF_MT_VIDEO_PRIMARIES = 0; - MF_MT_INTERLACE_MODE = 0; - MF_MT_FRAME_RATE_RANGE_MAX = 0; - MF_MT_FRAME_RATE_RANGE_MAX_low = 0; - memset(&MF_MT_MAJOR_TYPE, 0, sizeof(GUID)); - memset(&MF_MT_AM_FORMAT_TYPE, 0, sizeof(GUID)); - memset(&MF_MT_SUBTYPE, 0, sizeof(GUID)); } - videoInput::videoInput(void): accessToDevices(false) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"\n***** VIDEOINPUT LIBRARY - 2013 (Author: Evgeny Pereguda) *****\n\n"); - updateListOfDevices(); if(!accessToDevices) DPO->printOut(L"INITIALIZATION: Ther is not any suitable video device\n"); } - void videoInput::updateListOfDevices() { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - Media_Foundation *MF = &Media_Foundation::getInstance(); accessToDevices = MF->buildListOfDevices(); - if(!accessToDevices) DPO->printOut(L"UPDATING: Ther is not any suitable video device\n"); } - videoInput::~videoInput(void) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->printOut(L"\n***** CLOSE VIDEOINPUT LIBRARY - 2013 *****\n\n"); } - IMFMediaSource *videoInput::getMediaSource(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) { IMFMediaSource *out = VD->getMediaSource(); - if(!out) DPO->printOut(L"VideoDevice %i: There is not any suitable IMFMediaSource interface\n", deviceID); - return out; } } @@ -2802,33 +2061,25 @@ IMFMediaSource *videoInput::getMediaSource(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return NULL; } - bool videoInput::setupDevice(int deviceID, unsigned int id) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0 ) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return false; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) { bool out = VD->setupDevice(id); - if(!out) DPO->printOut(L"VIDEODEVICE %i: This device cannot be started\n", deviceID); - return out; } } @@ -2836,33 +2087,25 @@ bool videoInput::setupDevice(int deviceID, unsigned int id) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return false; } - bool videoInput::setupDevice(int deviceID, unsigned int w, unsigned int h, unsigned int idealFramerate) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0 ) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return false; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) { bool out = VD->setupDevice(w, h, idealFramerate); - if(!out) DPO->printOut(L"VIDEODEVICE %i: this device cannot be started\n", deviceID); - return out; } } @@ -2870,53 +2113,41 @@ bool videoInput::setupDevice(int deviceID, unsigned int w, unsigned int h, unsig { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n", deviceID); } - return false; } - MediaType videoInput::getFormat(int deviceID, unsigned int id) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return MediaType(); } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - - if(VD) + if(VD) return VD->getFormat(id); } else { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return MediaType(); } - bool videoInput::isDeviceSetup(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return false; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) return VD->isDeviceSetup(); } @@ -2924,26 +2155,20 @@ bool videoInput::isDeviceSetup(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return false; } - bool videoInput::isDeviceMediaSource(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return false; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) return VD->isDeviceMediaSource(); } @@ -2951,26 +2176,20 @@ bool videoInput::isDeviceMediaSource(int deviceID) { DPO->printOut(L"Device(s): There is not any suitable video device\n"); } - return false; } - bool videoInput::isDeviceRawDataSource(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return false; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) { bool isRaw = VD->isDeviceRawDataSource(); @@ -2981,20 +2200,16 @@ bool videoInput::isDeviceRawDataSource(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return false; } - bool videoInput::isFrameNew(int deviceID) -{ +{ DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return false; } - if(accessToDevices) { if(!isDeviceSetup(deviceID)) @@ -3002,11 +2217,8 @@ bool videoInput::isFrameNew(int deviceID) if(isDeviceMediaSource(deviceID)) return false; } - videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) { return VD->isFrameNew(); @@ -3016,26 +2228,20 @@ bool videoInput::isFrameNew(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return false; } - unsigned int videoInput::getCountFormats(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return 0; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) return VD->getCountFormats(); } @@ -3043,34 +2249,26 @@ unsigned int videoInput::getCountFormats(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return 0; } - void videoInput::closeAllDevices() { videoDevices *VDS = &videoDevices::getInstance(); - for(unsigned int i = 0; i < VDS->getCount(); i++) closeDevice(i); } - void videoInput::setParametrs(int deviceID, CamParametrs parametrs) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice *VD = VDS->getDevice(deviceID); - if(VD) VD->setParametrs(parametrs); } @@ -3079,24 +2277,19 @@ void videoInput::setParametrs(int deviceID, CamParametrs parametrs) DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } } - CamParametrs videoInput::getParametrs(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); CamParametrs out; - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return out; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice *VD = VDS->getDevice(deviceID); - if(VD) out = VD->getParametrs(); } @@ -3104,26 +2297,20 @@ CamParametrs videoInput::getParametrs(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return out; } - void videoInput::closeDevice(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice *VD = VDS->getDevice(deviceID); - if(VD) VD->closeDevice(); } @@ -3132,50 +2319,39 @@ void videoInput::closeDevice(int deviceID) DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } } - unsigned int videoInput::getWidth(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return 0; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - - if(VD) + if(VD) return VD->getWidth(); } else { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return 0; } - unsigned int videoInput::getHeight(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return 0; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) return VD->getHeight(); } @@ -3183,26 +2359,20 @@ unsigned int videoInput::getHeight(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return 0; } - wchar_t *videoInput::getNameVideoDevice(int deviceID) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return NULL; } - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - if(VD) return VD->getName(); } @@ -3210,81 +2380,60 @@ wchar_t *videoInput::getNameVideoDevice(int deviceID) { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return L"Empty"; } - unsigned int videoInput::listDevices(bool silent) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - int out = 0; - if(accessToDevices) { videoDevices *VDS = &videoDevices::getInstance(); - out = VDS->getCount(); - DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if(!silent)DPO->printOut(L"\nVIDEOINPUT SPY MODE!\n\n"); - if(!silent)DPO->printOut(L"SETUP: Looking For Capture Devices\n"); - for(int i = 0; i < out; i++) { if(!silent)DPO->printOut(L"SETUP: %i) %s \n",i, getNameVideoDevice(i)); } - if(!silent)DPO->printOut(L"SETUP: %i Device(s) found\n\n", out); - } else { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return out; } - -videoInput& videoInput::getInstance() +videoInput& videoInput::getInstance() { static videoInput instance; - return instance; } - bool videoInput::isDevicesAcceable() { return accessToDevices; } - void videoInput::setVerbose(bool state) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - DPO->setVerbose(state); + DPO->setVerbose(state); } - void videoInput::setEmergencyStopEvent(int deviceID, void *userData, void(*func)(int, void *)) { DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return; } - if(accessToDevices) { if(func) { videoDevices *VDS = &videoDevices::getInstance(); - videoDevice * VD = VDS->getDevice(deviceID); - - if(VD) + if(VD) VD->setEmergencyStopEvent(userData, func); } } @@ -3293,19 +2442,16 @@ void videoInput::setEmergencyStopEvent(int deviceID, void *userData, void(*func) DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } } - bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRedAndBlue, bool flipImage) { bool success = false; unsigned int bytes = 3; DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - if (deviceID < 0) { DPO->printOut(L"VIDEODEVICE %i: Invalid device ID\n", deviceID); return success; } - if(accessToDevices) { bool isRaw = isDeviceRawDataSource(deviceID); @@ -3313,18 +2459,14 @@ bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRed { videoDevices *VDS = &videoDevices::getInstance(); DebugPrintOut *DPO = &DebugPrintOut::getInstance(); - RawImage *RIOut = VDS->getDevice(deviceID)->getRawImageOut(); - if(RIOut) { unsigned int height = VDS->getDevice(deviceID)->getHeight(); - unsigned int width = VDS->getDevice(deviceID)->getWidth(); - + unsigned int width = VDS->getDevice(deviceID)->getWidth(); unsigned int size = bytes * width * height; - if(size == RIOut->getSize()) - { + { processPixels(RIOut->getpPixels(), dstBuffer, width, height, bytes, flipRedAndBlue, flipImage); success = true; } @@ -3336,7 +2478,7 @@ bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRed else { DPO->printOut(L"ERROR: GetPixels() - Unable to grab frame for device %i\n", deviceID); - } + } } else { @@ -3347,19 +2489,16 @@ bool videoInput::getPixels(int deviceID, unsigned char * dstBuffer, bool flipRed { DPO->printOut(L"VIDEODEVICE(s): There is not any suitable video device\n"); } - return success; } - -void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigned int width, +void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigned int width, unsigned int height, unsigned int bpp, bool bRGB, bool bFlip) -{ +{ unsigned int widthInBytes = width * bpp; unsigned int numBytes = widthInBytes * height; int *dstInt, *srcInt; - if(!bRGB) - { + { if(bFlip) { for(unsigned int y = 0; y < height; y++) @@ -3368,7 +2507,6 @@ void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigne srcInt = (int *)(src + ( (height -y -1) * widthInBytes)); memcpy(dstInt, srcInt, widthInBytes); } - } else { @@ -3379,11 +2517,9 @@ void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigne { if(bFlip) { - unsigned int x = 0; unsigned int y = (height - 1) * widthInBytes; src += y; - for(unsigned int i = 0; i < numBytes; i+=3) { if(x >= width) @@ -3391,49 +2527,38 @@ void videoInput::processPixels(unsigned char * src, unsigned char * dst, unsigne x = 0; src -= widthInBytes*2; } - *dst = *(src+2); dst++; - *dst = *(src+1); - dst++; - + dst++; *dst = *src; - dst++; - - src+=3; - x++; + dst++; + src+=3; + x++; } } else - { + { for(unsigned int i = 0; i < numBytes; i+=3) { *dst = *(src+2); dst++; - *dst = *(src+1); - dst++; - + dst++; *dst = *src; - dst++; - - src+=3; + dst++; + src+=3; } } } } - } - /******* Capturing video from camera via Microsoft Media Foundation **********/ - class CvCaptureCAM_MSMF : public CvCapture { public: CvCaptureCAM_MSMF(); virtual ~CvCaptureCAM_MSMF(); - virtual bool open( int index ); virtual void close(); virtual double getProperty(int); @@ -3441,23 +2566,18 @@ public: virtual bool grabFrame(); virtual IplImage* retrieveFrame(int); virtual int getCaptureDomain() { return CV_CAP_MSMF; } // Return the type of the capture object: CV_CAP_VFW, etc... - protected: void init(); - int index, width, height,fourcc; int widthSet, heightSet; IplImage* frame; videoInput VI; }; - struct SuppressVideoInputMessages { SuppressVideoInputMessages() { videoInput::setVerbose(true); } }; - static SuppressVideoInputMessages do_it; - CvCaptureCAM_MSMF::CvCaptureCAM_MSMF(): index(-1), width(-1), @@ -3470,13 +2590,11 @@ CvCaptureCAM_MSMF::CvCaptureCAM_MSMF(): { CoInitialize(0); } - CvCaptureCAM_MSMF::~CvCaptureCAM_MSMF() { close(); CoUninitialize(); } - void CvCaptureCAM_MSMF::close() { if( index >= 0 ) @@ -3487,13 +2605,11 @@ void CvCaptureCAM_MSMF::close() } widthSet = heightSet = width = height = -1; } - // Initialize camera input bool CvCaptureCAM_MSMF::open( int _index ) { int try_index = _index; int devices = 0; - close(); devices = VI.listDevices(true); if (devices == 0) @@ -3505,13 +2621,10 @@ bool CvCaptureCAM_MSMF::open( int _index ) index = try_index; return true; } - bool CvCaptureCAM_MSMF::grabFrame() { return true; } - - IplImage* CvCaptureCAM_MSMF::retrieveFrame(int) { if( !frame || (int)VI.getWidth(index) != frame->width || (int)VI.getHeight(index) != frame->height ) @@ -3521,12 +2634,9 @@ IplImage* CvCaptureCAM_MSMF::retrieveFrame(int) unsigned int w = VI.getWidth(index), h = VI.getHeight(index); frame = cvCreateImage( cvSize(w,h), 8, 3 ); } - VI.getPixels( index, (uchar*)frame->imageData, false, true ); - return frame; } - double CvCaptureCAM_MSMF::getProperty( int property_id ) { // image format proprrties @@ -3534,21 +2644,17 @@ double CvCaptureCAM_MSMF::getProperty( int property_id ) { case CV_CAP_PROP_FRAME_WIDTH: return VI.getWidth(index); - case CV_CAP_PROP_FRAME_HEIGHT: return VI.getHeight(index); - case CV_CAP_PROP_FOURCC: // FIXME: implement method in VideoInput back end //return VI.getFourcc(index); ; - case CV_CAP_PROP_FPS: - // FIXME: implement method in VideoInput back end + // FIXME: implement method in VideoInput back end //return VI.getFPS(index); ; } - // video filter properties switch( property_id ) { @@ -3568,7 +2674,6 @@ double CvCaptureCAM_MSMF::getProperty( int property_id ) // return (double)current_value; return 0.; } - // camera properties switch( property_id ) { @@ -3584,11 +2689,9 @@ double CvCaptureCAM_MSMF::getProperty( int property_id ) // max_value,stepping_delta,current_value,flags,defaultValue) ) return (double)current_value; return 0.; } - // unknown parameter or value not available return -1; } - bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) { // image capture properties @@ -3599,12 +2702,10 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) width = cvRound(value); handled = true; break; - case CV_CAP_PROP_FRAME_HEIGHT: height = cvRound(value); handled = true; break; - case CV_CAP_PROP_FOURCC: fourcc = (int)(unsigned long)(value); if ( fourcc == -1 ) { @@ -3613,7 +2714,6 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) } handled = true; break; - case CV_CAP_PROP_FPS: // FIXME: implement method in VideoInput back end // int fps = cvRound(value); @@ -3628,9 +2728,7 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) // } // return VI.isDeviceSetup(index); ; - } - if ( handled ) { // a stream setting if( width > 0 && height > 0 ) @@ -3643,7 +2741,6 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) // VI.setIdealFramerate(index, fps); // VI.setupDeviceFourcc(index, width, height, fourcc); } - bool success = VI.isDeviceSetup(index); if (success) { @@ -3655,14 +2752,12 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) } return true; } - // show video/camera filter dialog // FIXME: implement method in VideoInput back end // if ( property_id == CV_CAP_PROP_SETTINGS ) { // VI.showSettingsWindow(index); // return true; // } - //video Filter properties switch( property_id ) { @@ -3680,7 +2775,6 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) //return VI.setVideoSettingFilter(index,VI.getVideoPropertyFromCV(property_id),(long)value); ; } - //camera properties switch( property_id ) { @@ -3695,15 +2789,11 @@ bool CvCaptureCAM_MSMF::setProperty( int property_id, double value ) //return VI.setVideoSettingCamera(index,VI.getCameraPropertyFromCV(property_id),(long)value); ; } - return false; } - - CvCapture* cvCreateCameraCapture_MSMF( int index ) { CvCaptureCAM_MSMF* capture = new CvCaptureCAM_MSMF; - try { if( capture->open( index )) @@ -3714,9 +2804,7 @@ CvCapture* cvCreateCameraCapture_MSMF( int index ) delete capture; throw; } - delete capture; return 0; } - -#endif +#endif \ No newline at end of file diff --git a/platforms/winrt/arm.winrt.toolchain.cmake b/platforms/winrt/arm.winrt.toolchain.cmake index e8767f297..b34056cd5 100644 --- a/platforms/winrt/arm.winrt.toolchain.cmake +++ b/platforms/winrt/arm.winrt.toolchain.cmake @@ -1,5 +1,6 @@ set(CMAKE_SYSTEM_NAME Windows) set(CMAKE_SYSTEM_PROCESSOR "arm-v7a") +set(CMAKE_FIND_ROOT_PATH "${CMAKE_SOURCE_DIR}/platforms/winrt") set(CMAKE_REQUIRED_DEFINITIONS -D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) add_definitions(-D_ARM_WINAPI_PARTITION_DESKTOP_SDK_AVAILABLE) \ No newline at end of file From 18536fe8f9ea344016afe4b383e50876b282beee Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Wed, 3 Apr 2013 16:30:33 +0400 Subject: [PATCH 44/67] Fix target platform detection for x64 MinGW --- cmake/OpenCVDetectCXXCompiler.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/OpenCVDetectCXXCompiler.cmake b/cmake/OpenCVDetectCXXCompiler.cmake index 504004bce..9ee23da55 100644 --- a/cmake/OpenCVDetectCXXCompiler.cmake +++ b/cmake/OpenCVDetectCXXCompiler.cmake @@ -93,13 +93,13 @@ elseif(CMAKE_COMPILER_IS_GNUCXX) execute_process(COMMAND ${CMAKE_CXX_COMPILER} -dumpmachine OUTPUT_VARIABLE CMAKE_OPENCV_GCC_TARGET_MACHINE OUTPUT_STRIP_TRAILING_WHITESPACE) - if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "64") + if(CMAKE_OPENCV_GCC_TARGET_MACHINE MATCHES "amd64|x86_64|AMD64") set(MINGW64 1) endif() endif() endif() -if(CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" OR CMAKE_GENERATOR MATCHES "Visual Studio.*Win64") +if(MINGW64 OR CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*" OR CMAKE_GENERATOR MATCHES "Visual Studio.*Win64") set(X86_64 1) elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "i686.*|i386.*|x86.*|amd64.*|AMD64.*") set(X86 1) From 2122627877bd8e834ec283c6114c853105a4a502 Mon Sep 17 00:00:00 2001 From: Karl-Heinz Zimmer Date: Wed, 3 Apr 2013 17:54:29 +0200 Subject: [PATCH 45/67] Set ptr to NULL, so this method can be called repeatedly. This fixes a crash after unplugging web cam and trying to re-scan the cameras. --- modules/highgui/src/cap_libv4l.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/highgui/src/cap_libv4l.cpp b/modules/highgui/src/cap_libv4l.cpp index 63a2ff96b..ec048aff7 100644 --- a/modules/highgui/src/cap_libv4l.cpp +++ b/modules/highgui/src/cap_libv4l.cpp @@ -1714,6 +1714,7 @@ static void icvCloseCAM_V4L( CvCaptureCAM_V4L* capture ){ #endif free(capture->deviceName); + capture->deviceName = NULL; //v4l2_free_ranges(capture); //cvFree((void **)capture); } From eeaa1e875198abd21bde4a04c9938e65ea9e81ca Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Wed, 3 Apr 2013 20:02:13 +0400 Subject: [PATCH 46/67] Fix link of fat java wrapper on OS X --- modules/java/CMakeLists.txt | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt index cf99226ee..a4d895a6b 100644 --- a/modules/java/CMakeLists.txt +++ b/modules/java/CMakeLists.txt @@ -281,7 +281,14 @@ if(BUILD_FAT_JAVA_LIB) if(__extradeps) list(REMOVE_ITEM __deps ${__extradeps}) endif() - target_link_libraries(${the_module} -Wl,-whole-archive ${__deps} -Wl,-no-whole-archive ${__extradeps} ${OPENCV_LINKER_LIBS}) + if(APPLE) + foreach(_dep ${__deps}) + target_link_libraries(${the_module} -Wl,-force_load "${_dep}") + endforeach() + else() + target_link_libraries(${the_module} -Wl,-whole-archive ${__deps} -Wl,-no-whole-archive) + endif() + target_link_libraries(${the_module} ${__extradeps} ${OPENCV_LINKER_LIBS}) else() target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_LINKER_LIBS}) endif() From bf3264b1971334d6c02e89ec406302a6e3ec6900 Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Thu, 4 Apr 2013 11:04:51 +0400 Subject: [PATCH 47/67] Add missed CMake include for check_include_file Issue #2820 --- cmake/OpenCVFindLibsGrfmt.cmake | 1 + 1 file changed, 1 insertion(+) diff --git a/cmake/OpenCVFindLibsGrfmt.cmake b/cmake/OpenCVFindLibsGrfmt.cmake index 08af604ae..33e428b7c 100644 --- a/cmake/OpenCVFindLibsGrfmt.cmake +++ b/cmake/OpenCVFindLibsGrfmt.cmake @@ -116,6 +116,7 @@ if(WITH_PNG AND NOT IOS) else() include(FindPNG) if(PNG_FOUND) + include(CheckIncludeFile) check_include_file("${PNG_PNG_INCLUDE_DIR}/png.h" HAVE_PNG_H) check_include_file("${PNG_PNG_INCLUDE_DIR}/libpng/png.h" HAVE_LIBPNG_PNG_H) if(HAVE_PNG_H) From eb3c9ed1ab5bd7f744e5eb76053dc6e72ba8c31d Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Thu, 4 Apr 2013 11:27:43 +0400 Subject: [PATCH 48/67] Fix ffmpeg wrapper compatibility with libavcodec > 53.25.0 Based on pull request #685 --- modules/highgui/src/cap_ffmpeg_impl.hpp | 56 ++++++++++++++----------- 1 file changed, 32 insertions(+), 24 deletions(-) diff --git a/modules/highgui/src/cap_ffmpeg_impl.hpp b/modules/highgui/src/cap_ffmpeg_impl.hpp index e590cd278..d494948f9 100644 --- a/modules/highgui/src/cap_ffmpeg_impl.hpp +++ b/modules/highgui/src/cap_ffmpeg_impl.hpp @@ -148,6 +148,14 @@ extern "C" { #define AVERROR_EOF (-MKTAG( 'E','O','F',' ')) #endif +#if LIBAVCODEC_BUILD >= CALC_FFMPEG_VERSION(54,25,0) +# define CV_CODEC_ID AVCodecID +# define CV_CODEC(name) AV_##name +#else +# define CV_CODEC_ID CodecID +# define CV_CODEC(name) name +#endif + static int get_number_of_cpus(void) { #if LIBAVFORMAT_BUILD < CALC_FFMPEG_VERSION(52, 111, 0) @@ -1021,7 +1029,7 @@ static const char * icvFFMPEGErrStr(int err) /* function internal to FFMPEG (libavformat/riff.c) to lookup codec id by fourcc tag*/ extern "C" { - enum CodecID codec_get_bmp_id(unsigned int tag); + enum CV_CODEC_ID codec_get_bmp_id(unsigned int tag); } void CvVideoWriter_FFMPEG::init() @@ -1073,7 +1081,7 @@ static AVFrame * icv_alloc_picture_FFMPEG(int pix_fmt, int width, int height, bo /* add a video output stream to the container */ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc, - CodecID codec_id, + CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, int pixel_format) { @@ -1105,7 +1113,7 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc, c->codec_id = oc->oformat->video_codec; #endif - if(codec_id != CODEC_ID_NONE){ + if(codec_id != CV_CODEC(CODEC_ID_NONE)){ c->codec_id = codec_id; } @@ -1166,10 +1174,10 @@ static AVStream *icv_add_video_stream_FFMPEG(AVFormatContext *oc, c->gop_size = 12; /* emit one intra frame every twelve frames at most */ c->pix_fmt = (PixelFormat) pixel_format; - if (c->codec_id == CODEC_ID_MPEG2VIDEO) { + if (c->codec_id == CV_CODEC(CODEC_ID_MPEG2VIDEO)) { c->max_b_frames = 2; } - if (c->codec_id == CODEC_ID_MPEG1VIDEO || c->codec_id == CODEC_ID_MSMPEG4V3){ + if (c->codec_id == CV_CODEC(CODEC_ID_MPEG1VIDEO) || c->codec_id == CV_CODEC(CODEC_ID_MSMPEG4V3)){ /* needed to avoid using macroblocks in which some coeffs overflow this doesnt happen with normal video, it just happens here as the motion of the chroma plane doesnt match the luma plane */ @@ -1263,7 +1271,7 @@ bool CvVideoWriter_FFMPEG::writeFrame( const unsigned char* data, int step, int #if LIBAVFORMAT_BUILD < 5231 // It is not needed in the latest versions of the ffmpeg - if( c->codec_id == CODEC_ID_RAWVIDEO && origin != 1 ) + if( c->codec_id == CV_CODEC(CODEC_ID_RAWVIDEO) && origin != 1 ) { if( !temp_image.data ) { @@ -1450,7 +1458,7 @@ void CvVideoWriter_FFMPEG::close() bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, double fps, int width, int height, bool is_color ) { - CodecID codec_id = CODEC_ID_NONE; + CV_CODEC_ID codec_id = CV_CODEC(CODEC_ID_NONE); int err, codec_pix_fmt; double bitrate_scale = 1; @@ -1491,11 +1499,11 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, /* Lookup codec_id for given fourcc */ #if LIBAVCODEC_VERSION_INT<((51<<16)+(49<<8)+0) - if( (codec_id = codec_get_bmp_id( fourcc )) == CODEC_ID_NONE ) + if( (codec_id = codec_get_bmp_id( fourcc )) == CV_CODEC(CODEC_ID_NONE) ) return false; #else const struct AVCodecTag * tags[] = { codec_bmp_tags, NULL}; - if( (codec_id = av_codec_get_id(tags, fourcc)) == CODEC_ID_NONE ) + if( (codec_id = av_codec_get_id(tags, fourcc)) == CV_CODEC(CODEC_ID_NONE) ) return false; #endif @@ -1517,20 +1525,20 @@ bool CvVideoWriter_FFMPEG::open( const char * filename, int fourcc, // set a few optimal pixel formats for lossless codecs of interest.. switch (codec_id) { #if LIBAVCODEC_VERSION_INT>((50<<16)+(1<<8)+0) - case CODEC_ID_JPEGLS: + case CV_CODEC(CODEC_ID_JPEGLS): // BGR24 or GRAY8 depending on is_color... codec_pix_fmt = input_pix_fmt; break; #endif - case CODEC_ID_HUFFYUV: + case CV_CODEC(CODEC_ID_HUFFYUV): codec_pix_fmt = PIX_FMT_YUV422P; break; - case CODEC_ID_MJPEG: - case CODEC_ID_LJPEG: + case CV_CODEC(CODEC_ID_MJPEG): + case CV_CODEC(CODEC_ID_LJPEG): codec_pix_fmt = PIX_FMT_YUVJ420P; bitrate_scale = 3; break; - case CODEC_ID_RAWVIDEO: + case CV_CODEC(CODEC_ID_RAWVIDEO): codec_pix_fmt = input_pix_fmt == PIX_FMT_GRAY8 || input_pix_fmt == PIX_FMT_GRAY16LE || input_pix_fmt == PIX_FMT_GRAY16BE ? input_pix_fmt : PIX_FMT_YUV420P; @@ -1761,7 +1769,7 @@ struct OutputMediaStream_FFMPEG void write(unsigned char* data, int size, int keyFrame); // add a video output stream to the container - static AVStream* addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format); + static AVStream* addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format); AVOutputFormat* fmt_; AVFormatContext* oc_; @@ -1808,7 +1816,7 @@ void OutputMediaStream_FFMPEG::close() } } -AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CodecID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format) +AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CV_CODEC_ID codec_id, int w, int h, int bitrate, double fps, PixelFormat pixel_format) { #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 10, 0) AVStream* st = avformat_new_stream(oc, 0); @@ -1888,10 +1896,10 @@ AVStream* OutputMediaStream_FFMPEG::addVideoStream(AVFormatContext *oc, CodecID c->gop_size = 12; // emit one intra frame every twelve frames at most c->pix_fmt = pixel_format; - if (c->codec_id == CODEC_ID_MPEG2VIDEO) + if (c->codec_id == CV_CODEC(CODEC_ID_MPEG2VIDEO)) c->max_b_frames = 2; - if (c->codec_id == CODEC_ID_MPEG1VIDEO || c->codec_id == CODEC_ID_MSMPEG4V3) + if (c->codec_id == CV_CODEC(CODEC_ID_MPEG1VIDEO) || c->codec_id == CV_CODEC(CODEC_ID_MSMPEG4V3)) { // needed to avoid using macroblocks in which some coeffs overflow // this doesnt happen with normal video, it just happens here as the @@ -1928,7 +1936,7 @@ bool OutputMediaStream_FFMPEG::open(const char* fileName, int width, int height, if (!fmt_) return false; - CodecID codec_id = CODEC_ID_H264; + CV_CODEC_ID codec_id = CV_CODEC(CODEC_ID_H264); // alloc memory for context #if LIBAVFORMAT_BUILD >= CALC_FFMPEG_VERSION(53, 2, 0) @@ -2129,23 +2137,23 @@ bool InputMediaStream_FFMPEG::open(const char* fileName, int* codec, int* chroma switch (enc->codec_id) { - case CODEC_ID_MPEG1VIDEO: + case CV_CODEC(CODEC_ID_MPEG1VIDEO): *codec = ::VideoCodec_MPEG1; break; - case CODEC_ID_MPEG2VIDEO: + case CV_CODEC(CODEC_ID_MPEG2VIDEO): *codec = ::VideoCodec_MPEG2; break; - case CODEC_ID_MPEG4: + case CV_CODEC(CODEC_ID_MPEG4): *codec = ::VideoCodec_MPEG4; break; - case CODEC_ID_VC1: + case CV_CODEC(CODEC_ID_VC1): *codec = ::VideoCodec_VC1; break; - case CODEC_ID_H264: + case CV_CODEC(CODEC_ID_H264): *codec = ::VideoCodec_H264; break; From 2c57445ffe53d3bbd62b766014e1ee083c2f9daa Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Thu, 4 Apr 2013 11:57:00 +0400 Subject: [PATCH 49/67] Improve CMake checks for the OpenGL availability Issue #2868 --- CMakeLists.txt | 6 +++--- cmake/OpenCVFindLibsGUI.cmake | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index b9823b372..f7e948954 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -118,11 +118,11 @@ OCV_OPTION(WITH_CUFFT "Include NVidia Cuda Fast Fourier Transform (FFT) OCV_OPTION(WITH_CUBLAS "Include NVidia Cuda Basic Linear Algebra Subprograms (BLAS) library support" OFF IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_NVCUVID "Include NVidia Video Decoding library support" OFF IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS AND NOT APPLE) ) OCV_OPTION(WITH_EIGEN "Include Eigen2/Eigen3 support" ON) -OCV_OPTION(WITH_VFW "Include Video for Windows support" ON IF (WIN32)) +OCV_OPTION(WITH_VFW "Include Video for Windows support" ON IF WIN32 ) OCV_OPTION(WITH_FFMPEG "Include FFMPEG support" ON IF (NOT ANDROID AND NOT IOS)) OCV_OPTION(WITH_GSTREAMER "Include Gstreamer support" ON IF (UNIX AND NOT APPLE AND NOT ANDROID) ) OCV_OPTION(WITH_GTK "Include GTK support" ON IF (UNIX AND NOT APPLE AND NOT ANDROID) ) -OCV_OPTION(WITH_IMAGEIO "ImageIO support for OS X" OFF IF APPLE) +OCV_OPTION(WITH_IMAGEIO "ImageIO support for OS X" OFF IF APPLE ) OCV_OPTION(WITH_IPP "Include Intel IPP support" OFF IF (MSVC OR X86 OR X86_64) ) OCV_OPTION(WITH_JASPER "Include JPEG2K support" ON IF (NOT IOS) ) OCV_OPTION(WITH_JPEG "Include JPEG support" ON) @@ -133,7 +133,7 @@ OCV_OPTION(WITH_PNG "Include PNG support" ON) OCV_OPTION(WITH_PVAPI "Include Prosilica GigE support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_GIGEAPI "Include Smartek GigE support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_QT "Build with Qt Backend support" OFF IF (NOT ANDROID AND NOT IOS) ) -OCV_OPTION(WITH_WIN32UI "Build with Win32 UI Backend support" ON IF (WIN32) ) +OCV_OPTION(WITH_WIN32UI "Build with Win32 UI Backend support" ON IF WIN32 ) OCV_OPTION(WITH_QUICKTIME "Use QuickTime for Video I/O insted of QTKit" OFF IF APPLE ) OCV_OPTION(WITH_TBB "Include Intel TBB support" OFF IF (NOT IOS) ) OCV_OPTION(WITH_CSTRIPES "Include C= support" OFF IF WIN32 ) diff --git a/cmake/OpenCVFindLibsGUI.cmake b/cmake/OpenCVFindLibsGUI.cmake index 14095442d..3b42f1b0b 100644 --- a/cmake/OpenCVFindLibsGUI.cmake +++ b/cmake/OpenCVFindLibsGUI.cmake @@ -35,7 +35,7 @@ endif() # --- OpenGl --- ocv_clear_vars(HAVE_OPENGL HAVE_QT_OPENGL) if(WITH_OPENGL) - if(WIN32 OR QT_QTOPENGL_FOUND OR HAVE_GTKGLEXT) + if(WITH_WIN32UI OR (HAVE_QT AND QT_QTOPENGL_FOUND) OR HAVE_GTKGLEXT) find_package (OpenGL QUIET) if(OPENGL_FOUND) set(HAVE_OPENGL TRUE) From 36367ec027a5f2fc8b49f8a945ceb6063556318a Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 4 Apr 2013 12:18:13 +0400 Subject: [PATCH 50/67] Several fixes android related fixes Native activity build warning fixed; Logcat messages for JavaCameraView updated (Bug #2876); Some fixes for feature #2893 done. --- cmake/OpenCVDetectAndroidSDK.cmake | 1 + .../src/java/android+CameraBridgeViewBase.java | 3 +++ .../generator/src/java/android+JavaCameraView.java | 10 +++------- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmake/OpenCVDetectAndroidSDK.cmake b/cmake/OpenCVDetectAndroidSDK.cmake index b125561d4..92d7ba327 100644 --- a/cmake/OpenCVDetectAndroidSDK.cmake +++ b/cmake/OpenCVDetectAndroidSDK.cmake @@ -278,6 +278,7 @@ macro(add_android_project target path) if (NATIVE_APP_GLUE) include_directories(${ANDROID_NDK}/sources/android/native_app_glue) list(APPEND android_proj_jni_files ${ANDROID_NDK}/sources/android/native_app_glue/android_native_app_glue.c) + ocv_warnings_disable(CMAKE_C_FLAGS -Wstrict-prototypes -Wunused-parameter -Wmissing-prototypes) set(android_proj_NATIVE_DEPS ${android_proj_NATIVE_DEPS} android) endif() diff --git a/modules/java/generator/src/java/android+CameraBridgeViewBase.java b/modules/java/generator/src/java/android+CameraBridgeViewBase.java index e76ac26c5..36417c582 100644 --- a/modules/java/generator/src/java/android+CameraBridgeViewBase.java +++ b/modules/java/generator/src/java/android+CameraBridgeViewBase.java @@ -54,6 +54,9 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac public CameraBridgeViewBase(Context context, int cameraId) { super(context); mCameraIndex = cameraId; + getHolder().addCallback(this); + mMaxWidth = MAX_UNSPECIFIED; + mMaxHeight = MAX_UNSPECIFIED; } public CameraBridgeViewBase(Context context, AttributeSet attrs) { diff --git a/modules/java/generator/src/java/android+JavaCameraView.java b/modules/java/generator/src/java/android+JavaCameraView.java index 34fe6091a..f07b7d2ca 100644 --- a/modules/java/generator/src/java/android+JavaCameraView.java +++ b/modules/java/generator/src/java/android+JavaCameraView.java @@ -60,7 +60,6 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb public JavaCameraView(Context context, AttributeSet attrs) { super(context, attrs); - Log.d(TAG, "Java camera view ctor"); } protected boolean initializeCamera(int width, int height) { @@ -237,10 +236,8 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb } public void onPreviewFrame(byte[] frame, Camera arg1) { - Log.i(TAG, "Preview Frame received. Need to create MAT and deliver it to clients"); - Log.i(TAG, "Frame size is " + frame.length); - synchronized (this) - { + Log.d(TAG, "Preview Frame received. Frame size: " + frame.length); + synchronized (this) { mFrameChain[1 - mChainIdx].put(0, 0, frame); this.notify(); } @@ -248,8 +245,7 @@ public class JavaCameraView extends CameraBridgeViewBase implements PreviewCallb mCamera.addCallbackBuffer(mBuffer); } - private class JavaCameraFrame implements CvCameraViewFrame - { + private class JavaCameraFrame implements CvCameraViewFrame { public Mat gray() { return mYuvFrameData.submat(0, mHeight, 0, mWidth); } From 382695ba5a19f53dbdcfcc36e26c1c2c893b4885 Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Thu, 4 Apr 2013 13:44:28 +0400 Subject: [PATCH 51/67] removed links to the http://opencv.willowgarage.com/ --- doc/_themes/blue/layout.html | 2 +- doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst | 2 -- modules/contrib/doc/facerec/facerec_tutorial.rst | 2 +- modules/contrib/doc/facerec/src/CMakeLists.txt | 2 +- modules/core/doc/intro.rst | 2 +- modules/features2d/doc/feature_detection_and_description.rst | 2 +- samples/cpp/Qt_sample/main.cpp | 2 +- 7 files changed, 6 insertions(+), 8 deletions(-) diff --git a/doc/_themes/blue/layout.html b/doc/_themes/blue/layout.html index 8bba49b17..a376c9759 100644 --- a/doc/_themes/blue/layout.html +++ b/doc/_themes/blue/layout.html @@ -183,7 +183,7 @@ {% if theme_lang == 'c' %} {% endif %} {% if theme_lang == 'cpp' %} -
  • Try the Cheatsheet.
  • +
  • Try the Cheatsheet.
  • {% endif %} {% if theme_lang == 'py' %}
  • Try the Cookbook.
  • diff --git a/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst b/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst index 0e3f32979..41c161ce3 100644 --- a/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst +++ b/doc/tutorials/introduction/linux_eclipse/linux_eclipse.rst @@ -201,8 +201,6 @@ Assuming that the image to use as the argument would be located in ` section of the OpenCV Wiki) - Say you have or create a new file, *helloworld.cpp* in a directory called *foo*: .. code-block:: cpp diff --git a/modules/contrib/doc/facerec/facerec_tutorial.rst b/modules/contrib/doc/facerec/facerec_tutorial.rst index cc2aa413b..61cd882da 100644 --- a/modules/contrib/doc/facerec/facerec_tutorial.rst +++ b/modules/contrib/doc/facerec/facerec_tutorial.rst @@ -7,7 +7,7 @@ Face Recognition with OpenCV Introduction ============ -`OpenCV (Open Source Computer Vision) `_ is a popular computer vision library started by `Intel `_ in 1999. The cross-platform library sets its focus on real-time image processing and includes patent-free implementations of the latest computer vision algorithms. In 2008 `Willow Garage `_ took over support and OpenCV 2.3.1 now comes with a programming interface to C, C++, `Python `_ and `Android `_. OpenCV is released under a BSD license so it is used in academic projects and commercial products alike. +`OpenCV (Open Source Computer Vision) `_ is a popular computer vision library started by `Intel `_ in 1999. The cross-platform library sets its focus on real-time image processing and includes patent-free implementations of the latest computer vision algorithms. In 2008 `Willow Garage `_ took over support and OpenCV 2.3.1 now comes with a programming interface to C, C++, `Python `_ and `Android `_. OpenCV is released under a BSD license so it is used in academic projects and commercial products alike. OpenCV 2.4 now comes with the very new :ocv:class:`FaceRecognizer` class for face recognition, so you can start experimenting with face recognition right away. This document is the guide I've wished for, when I was working myself into face recognition. It shows you how to perform face recognition with :ocv:class:`FaceRecognizer` in OpenCV (with full source code listings) and gives you an introduction into the algorithms behind. I'll also show how to create the visualizations you can find in many publications, because a lot of people asked for. diff --git a/modules/contrib/doc/facerec/src/CMakeLists.txt b/modules/contrib/doc/facerec/src/CMakeLists.txt index 10720048c..e56762ea4 100644 --- a/modules/contrib/doc/facerec/src/CMakeLists.txt +++ b/modules/contrib/doc/facerec/src/CMakeLists.txt @@ -6,7 +6,7 @@ project(facerec_cpp_samples) #SET(OpenCV_DIR /path/to/your/opencv/installation) # packages -find_package(OpenCV REQUIRED) # http://opencv.willowgarage.com +find_package(OpenCV REQUIRED) # http://opencv.org # probably you should loop through the sample files here add_executable(facerec_demo facerec_demo.cpp) diff --git a/modules/core/doc/intro.rst b/modules/core/doc/intro.rst index 106d698a1..0f8a3b0d5 100644 --- a/modules/core/doc/intro.rst +++ b/modules/core/doc/intro.rst @@ -4,7 +4,7 @@ Introduction .. highlight:: cpp -OpenCV (Open Source Computer Vision Library: http://opencv.willowgarage.com/wiki/) is an open-source BSD-licensed library that includes several hundreds of computer vision algorithms. The document describes the so-called OpenCV 2.x API, which is essentially a C++ API, as opposite to the C-based OpenCV 1.x API. The latter is described in opencv1x.pdf. +OpenCV (Open Source Computer Vision Library: http://opencv.org) is an open-source BSD-licensed library that includes several hundreds of computer vision algorithms. The document describes the so-called OpenCV 2.x API, which is essentially a C++ API, as opposite to the C-based OpenCV 1.x API. The latter is described in opencv1x.pdf. OpenCV has a modular structure, which means that the package includes several shared or static libraries. The following modules are available: diff --git a/modules/features2d/doc/feature_detection_and_description.rst b/modules/features2d/doc/feature_detection_and_description.rst index a39dc68bf..80a1de04a 100644 --- a/modules/features2d/doc/feature_detection_and_description.rst +++ b/modules/features2d/doc/feature_detection_and_description.rst @@ -49,7 +49,7 @@ Maximally stable extremal region extractor. :: }; The class encapsulates all the parameters of the MSER extraction algorithm (see -http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions). Also see http://opencv.willowgarage.com/wiki/documentation/cpp/features2d/MSER for useful comments and parameters description. +http://en.wikipedia.org/wiki/Maximally_stable_extremal_regions). Also see http://code.opencv.org/projects/opencv/wiki/MSER for useful comments and parameters description. ORB diff --git a/samples/cpp/Qt_sample/main.cpp b/samples/cpp/Qt_sample/main.cpp index f987de471..105b973eb 100644 --- a/samples/cpp/Qt_sample/main.cpp +++ b/samples/cpp/Qt_sample/main.cpp @@ -33,7 +33,7 @@ static void help() "It works off of the video: cube4.avi\n" "Using OpenCV version %s\n" << CV_VERSION << "\n\n" " 1). This demo is mainly based on work from Javier Barandiaran Martirena\n" -" See this page http://opencv.willowgarage.com/wiki/Posit.\n" +" See this page http://code.opencv.org/projects/opencv/wiki/Posit.\n" " 2). This is a demo to illustrate how to use **OpenGL Callback**.\n" " 3). You need Qt binding to compile this sample with OpenGL support enabled.\n" " 4). The features' detection is very basic and could highly be improved \n" From 235a678458f721e477ede5e370c759cf7f32f927 Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Thu, 4 Apr 2013 13:55:36 +0400 Subject: [PATCH 52/67] SVD: always update W vector for better algorithm convergency --- modules/core/src/lapack.cpp | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/modules/core/src/lapack.cpp b/modules/core/src/lapack.cpp index b20273f6e..f5fe53ae9 100644 --- a/modules/core/src/lapack.cpp +++ b/modules/core/src/lapack.cpp @@ -577,10 +577,10 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, continue; p *= 2; - double beta = a - b, gamma = hypot((double)p, beta), delta; + double beta = a - b, gamma = hypot((double)p, beta); if( beta < 0 ) { - delta = (gamma - beta)*0.5; + double delta = (gamma - beta)*0.5; s = (_Tp)std::sqrt(delta/gamma); c = (_Tp)(p/(gamma*s*2)); } @@ -588,36 +588,18 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep, { c = (_Tp)std::sqrt((gamma + beta)/(gamma*2)); s = (_Tp)(p/(gamma*c*2)); - delta = p*p*0.5/(gamma + beta); } - W[i] += delta; - W[j] -= delta; - - if( iter % 2 != 0 && W[i] > 0 && W[j] > 0 ) + a = b = 0; + for( k = 0; k < m; k++ ) { - k = vblas.givens(Ai, Aj, m, c, s); + _Tp t0 = c*Ai[k] + s*Aj[k]; + _Tp t1 = -s*Ai[k] + c*Aj[k]; + Ai[k] = t0; Aj[k] = t1; - for( ; k < m; k++ ) - { - _Tp t0 = c*Ai[k] + s*Aj[k]; - _Tp t1 = -s*Ai[k] + c*Aj[k]; - Ai[k] = t0; Aj[k] = t1; - } - } - else - { - a = b = 0; - for( k = 0; k < m; k++ ) - { - _Tp t0 = c*Ai[k] + s*Aj[k]; - _Tp t1 = -s*Ai[k] + c*Aj[k]; - Ai[k] = t0; Aj[k] = t1; - - a += (double)t0*t0; b += (double)t1*t1; - } - W[i] = a; W[j] = b; + a += (double)t0*t0; b += (double)t1*t1; } + W[i] = a; W[j] = b; changed = true; From 6aa4f533fa8694fced893a561382261eef5339a9 Mon Sep 17 00:00:00 2001 From: Vadim Pisarevsky Date: Thu, 4 Apr 2013 14:27:11 +0400 Subject: [PATCH 53/67] attempt to fix stereobm failures on Mac and Linux --- modules/calib3d/test/test_stereomatching.cpp | 23 ++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/modules/calib3d/test/test_stereomatching.cpp b/modules/calib3d/test/test_stereomatching.cpp index b715d48f5..3a0d73ce5 100644 --- a/modules/calib3d/test/test_stereomatching.cpp +++ b/modules/calib3d/test/test_stereomatching.cpp @@ -459,14 +459,29 @@ void CV_StereoMatchingTest::run(int) continue; } int dispScaleFactor = datasetsParams[datasetName].dispScaleFactor; - trueLeftDisp.convertTo( trueLeftDisp, CV_32FC1, 1.f/dispScaleFactor ); + Mat tmp; + + trueLeftDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor ); + trueLeftDisp = tmp; + tmp.release(); + if( !trueRightDisp.empty() ) - trueRightDisp.convertTo( trueRightDisp, CV_32FC1, 1.f/dispScaleFactor ); + { + trueRightDisp.convertTo( tmp, CV_32FC1, 1.f/dispScaleFactor ); + trueRightDisp = tmp; + tmp.release(); + } Mat leftDisp, rightDisp; int ignBorder = max(runStereoMatchingAlgorithm(leftImg, rightImg, leftDisp, rightDisp, ci), EVAL_IGNORE_BORDER); - leftDisp.convertTo( leftDisp, CV_32FC1 ); - rightDisp.convertTo( rightDisp, CV_32FC1 ); + + leftDisp.convertTo( tmp, CV_32FC1 ); + leftDisp = tmp; + tmp.release(); + + rightDisp.convertTo( tmp, CV_32FC1 ); + rightDisp = tmp; + tmp.release(); int tempCode = processStereoMatchingResults( resFS, ci, isWrite, leftImg, rightImg, trueLeftDisp, trueRightDisp, leftDisp, rightDisp, QualityEvalParams(ignBorder)); From 35f75147b04992a810419b705e1c6db11e77b655 Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Thu, 4 Apr 2013 14:33:00 +0400 Subject: [PATCH 54/67] removed willowgarage.com links --- samples/cpp/stereo_calib.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/cpp/stereo_calib.cpp b/samples/cpp/stereo_calib.cpp index a6d276f8b..07621cef8 100644 --- a/samples/cpp/stereo_calib.cpp +++ b/samples/cpp/stereo_calib.cpp @@ -22,7 +22,7 @@ * An active user group is at: http://tech.groups.yahoo.com/group/OpenCV/ * The minutes of weekly OpenCV development meetings are at: - http://pr.willowgarage.com/wiki/OpenCV + http://code.opencv.org/projects/opencv/wiki/Meeting_notes ************************************************** */ #include "opencv2/calib3d/calib3d.hpp" From fa64f28c6ba629292c56c44ca5dd764fbb8d11db Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Thu, 4 Apr 2013 15:43:49 +0400 Subject: [PATCH 55/67] Fix binary directory used in CMake try_compile commands Old paths can have problems with cross-compilation --- cmake/OpenCVFindLibsPerf.cmake | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cmake/OpenCVFindLibsPerf.cmake b/cmake/OpenCVFindLibsPerf.cmake index b94c35769..72b4ba6f7 100644 --- a/cmake/OpenCVFindLibsPerf.cmake +++ b/cmake/OpenCVFindLibsPerf.cmake @@ -49,8 +49,9 @@ endif() # --- OpenMP --- if(NOT HAVE_TBB AND NOT HAVE_CSTRIPES) set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/omptest.cpp") - FILE(WRITE "${_fname}" "#ifndef _OPENMP\n#error\n#endif\nint main() { return 0; }\n") - TRY_COMPILE(HAVE_OPENMP "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp" "${_fname}") + file(WRITE "${_fname}" "#ifndef _OPENMP\n#error\n#endif\nint main() { return 0; }\n") + try_compile(HAVE_OPENMP "${CMAKE_BINARY_DIR}" "${_fname}") + file(REMOVE "${_fname}") else() set(HAVE_OPENMP 0) endif() @@ -65,8 +66,9 @@ endif() # --- Concurrency --- if(MSVC AND NOT HAVE_TBB AND NOT HAVE_CSTRIPES AND NOT HAVE_OPENMP) set(_fname "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/concurrencytest.cpp") - FILE(WRITE "${_fname}" "#if _MSC_VER < 1600\n#error\n#endif\nint main() { return 0; }\n") - TRY_COMPILE(HAVE_CONCURRENCY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp" "${_fname}") + file(WRITE "${_fname}" "#if _MSC_VER < 1600\n#error\n#endif\nint main() { return 0; }\n") + try_compile(HAVE_CONCURRENCY "${CMAKE_BINARY_DIR}" "${_fname}") + file(REMOVE "${_fname}") else() set(HAVE_CONCURRENCY 0) endif() From 1e332d690fd7a89f40b07b9970323847c6964625 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 4 Apr 2013 15:50:36 +0400 Subject: [PATCH 56/67] OpenCV verison incremented OpenCV Manager verison incremeneted; Docs and tests updated accordingly; COnstant for Manager initialization added. --- android/service/doc/JavaHelper.rst | 4 ++++ android/service/engine/AndroidManifest.xml | 7 +++---- .../engine/jni/BinderComponent/OpenCVEngine.cpp | 2 +- .../engine/jni/Tests/OpenCVEngineTest.cpp | 2 +- android/service/engine/project.properties | 2 +- android/service/readme.txt | 12 ++++++------ .../android_binary_package/O4A_SDK.rst | 14 +++++++------- .../dev_with_OCV_on_Android.rst | 16 ++++++++-------- modules/core/include/opencv2/core/version.hpp | 2 +- .../generator/src/java/android+OpenCVLoader.java | 6 ++++++ 10 files changed, 38 insertions(+), 29 deletions(-) diff --git a/android/service/doc/JavaHelper.rst b/android/service/doc/JavaHelper.rst index 34798c267..e90b016e5 100644 --- a/android/service/doc/JavaHelper.rst +++ b/android/service/doc/JavaHelper.rst @@ -51,3 +51,7 @@ OpenCV version constants .. data:: OPENCV_VERSION_2_4_4 OpenCV Library version 2.4.4 + +.. data:: OPENCV_VERSION_2_4_5 + + OpenCV Library version 2.4.5 diff --git a/android/service/engine/AndroidManifest.xml b/android/service/engine/AndroidManifest.xml index f4f0eb94f..954955678 100644 --- a/android/service/engine/AndroidManifest.xml +++ b/android/service/engine/AndroidManifest.xml @@ -1,8 +1,8 @@ + android:versionCode="27@ANDROID_PLATFORM_VERSION_CODE@" + android:versionName="2.7" > @@ -26,6 +26,5 @@ - - + \ No newline at end of file diff --git a/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp b/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp index c10ab5428..274e36a4b 100644 --- a/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp +++ b/android/service/engine/jni/BinderComponent/OpenCVEngine.cpp @@ -15,7 +15,7 @@ using namespace android; const int OpenCVEngine::Platform = DetectKnownPlatforms(); const int OpenCVEngine::CpuID = GetCpuID(); -const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302, 2040400}; +const int OpenCVEngine::KnownVersions[] = {2040000, 2040100, 2040200, 2040300, 2040301, 2040302, 2040400, 2040500}; bool OpenCVEngine::ValidateVersion(int version) { diff --git a/android/service/engine/jni/Tests/OpenCVEngineTest.cpp b/android/service/engine/jni/Tests/OpenCVEngineTest.cpp index ce5159f81..7473387a0 100644 --- a/android/service/engine/jni/Tests/OpenCVEngineTest.cpp +++ b/android/service/engine/jni/Tests/OpenCVEngineTest.cpp @@ -294,7 +294,7 @@ TEST(OpenCVEngineTest, GetPathFor2_4_5) Starter.PackageManager->InstallVersion(2040500, PLATFORM_UNKNOWN, ARCH_ARMv7); EXPECT_FALSE(NULL == Engine.get()); String16 result = Engine->GetLibPathByVersion(String16("2.4.5")); - EXPECT_EQ(0, result.size()); // 2.4.5 is not published yet + EXPECT_STREQ("/data/data/org.opencv.lib_v24_armv7a/lib", String8(result).string()); } #endif diff --git a/android/service/engine/project.properties b/android/service/engine/project.properties index c6998b3d1..85aac5401 100644 --- a/android/service/engine/project.properties +++ b/android/service/engine/project.properties @@ -11,4 +11,4 @@ #proguard.config=${sdk.dir}/tools/proguard/proguard-android.txt:proguard-project.txt # Project target. -target=android-9 +target=android-8 diff --git a/android/service/readme.txt b/android/service/readme.txt index df17c1824..f4e65eb36 100644 --- a/android/service/readme.txt +++ b/android/service/readme.txt @@ -14,20 +14,20 @@ manually using adb tool: .. code-block:: sh - adb install OpenCV-2.4.4-android-sdk/apk/OpenCV_2.4.4_Manager_2.6_.apk + adb install OpenCV-2.4.5-android-sdk/apk/OpenCV_2.4.5_Manager_2.7_.apk Use the table below to determine proper OpenCV Manager package for your device: +------------------------------+--------------+---------------------------------------------------+ | Hardware Platform | Android ver. | Package name | +==============================+==============+===================================================+ -| armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.4_Manager_2.6_armv7a-neon.apk | +| armeabi-v7a (ARMv7-A + NEON) | >= 2.3 | OpenCV_2.4.5_Manager_2.7_armv7a-neon.apk | +------------------------------+--------------+---------------------------------------------------+ -| armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.4_Manager_2.6_armv7a-neon-android8.apk | +| armeabi-v7a (ARMv7-A + NEON) | = 2.2 | OpenCV_2.4.5_Manager_2.7_armv7a-neon-android8.apk | +------------------------------+--------------+---------------------------------------------------+ -| armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.4_Manager_2.6_armeabi.apk | +| armeabi (ARMv5, ARMv6) | >= 2.3 | OpenCV_2.4.5_Manager_2.7_armeabi.apk | +------------------------------+--------------+---------------------------------------------------+ -| Intel x86 | >= 2.3 | OpenCV_2.4.4_Manager_2.6_x86.apk | +| Intel x86 | >= 2.3 | OpenCV_2.4.5_Manager_2.7_x86.apk | +------------------------------+--------------+---------------------------------------------------+ -| MIPS | >= 2.3 | OpenCV_2.4.4_Manager_2.6_mips.apk | +| MIPS | >= 2.3 | OpenCV_2.4.5_Manager_2.7_mips.apk | +------------------------------+--------------+---------------------------------------------------+ diff --git a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst index b37126f30..b6c859dc3 100644 --- a/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst +++ b/doc/tutorials/introduction/android_binary_package/O4A_SDK.rst @@ -48,10 +48,10 @@ The structure of package contents looks as follows: :: - OpenCV-2.4.4-android-sdk + OpenCV-2.4.5-android-sdk |_ apk - | |_ OpenCV_2.4.4_binary_pack_armv7a.apk - | |_ OpenCV_2.4.4_Manager_2.6_XXX.apk + | |_ OpenCV_2.4.5_binary_pack_armv7a.apk + | |_ OpenCV_2.4.5_Manager_2.7_XXX.apk | |_ doc |_ samples @@ -157,10 +157,10 @@ Get the OpenCV4Android SDK .. code-block:: bash - unzip ~/Downloads/OpenCV-2.4.4-android-sdk.zip + unzip ~/Downloads/OpenCV-2.4.5-android-sdk.zip -.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.4-android-sdk.zip` -.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.4/OpenCV-2.4.4-android-sdk.zip/download +.. |opencv_android_bin_pack| replace:: :file:`OpenCV-2.4.5-android-sdk.zip` +.. _opencv_android_bin_pack_url: http://sourceforge.net/projects/opencvlibrary/files/opencv-android/2.4.5/OpenCV-2.4.5-android-sdk.zip/download .. |opencv_android_bin_pack_url| replace:: |opencv_android_bin_pack| .. |seven_zip| replace:: 7-Zip .. _seven_zip: http://www.7-zip.org/ @@ -295,7 +295,7 @@ Well, running samples from Eclipse is very simple: .. code-block:: sh :linenos: - /platform-tools/adb install /apk/OpenCV_2.4.4_Manager_2.6_armv7a-neon.apk + /platform-tools/adb install /apk/OpenCV_2.4.5_Manager_2.7_armv7a-neon.apk .. note:: ``armeabi``, ``armv7a-neon``, ``arm7a-neon-android8``, ``mips`` and ``x86`` stand for platform targets: diff --git a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst index c9635aae6..5709b64b3 100644 --- a/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst +++ b/doc/tutorials/introduction/android_binary_package/dev_with_OCV_on_Android.rst @@ -55,14 +55,14 @@ Manager to access OpenCV libraries externally installed in the target system. :guilabel:`File -> Import -> Existing project in your workspace`. Press :guilabel:`Browse` button and locate OpenCV4Android SDK - (:file:`OpenCV-2.4.4-android-sdk/sdk`). + (:file:`OpenCV-2.4.5-android-sdk/sdk`). .. image:: images/eclipse_opencv_dependency0.png :alt: Add dependency from OpenCV library :align: center #. In application project add a reference to the OpenCV Java SDK in - :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.4``. + :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.5``. .. image:: images/eclipse_opencv_dependency1.png :alt: Add dependency from OpenCV library @@ -101,7 +101,7 @@ See the "15-puzzle" OpenCV sample for details. public void onResume() { super.onResume(); - OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_3, this, mLoaderCallback); + OpenCVLoader.initAsync(OpenCVLoader.OPENCV_VERSION_2_4_5, this, mLoaderCallback); } ... @@ -128,27 +128,27 @@ described above. #. Add the OpenCV library project to your workspace the same way as for the async initialization above. Use menu :guilabel:`File -> Import -> Existing project in your workspace`, press :guilabel:`Browse` button and select OpenCV SDK path - (:file:`OpenCV-2.4.4-android-sdk/sdk`). + (:file:`OpenCV-2.4.5-android-sdk/sdk`). .. image:: images/eclipse_opencv_dependency0.png :alt: Add dependency from OpenCV library :align: center #. In the application project add a reference to the OpenCV4Android SDK in - :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.4``; + :guilabel:`Project -> Properties -> Android -> Library -> Add` select ``OpenCV Library - 2.4.5``; .. image:: images/eclipse_opencv_dependency1.png :alt: Add dependency from OpenCV library :align: center #. If your application project **doesn't have a JNI part**, just copy the corresponding OpenCV - native libs from :file:`/sdk/native/libs/` to your + native libs from :file:`/sdk/native/libs/` to your project directory to folder :file:`libs/`. In case of the application project **with a JNI part**, instead of manual libraries copying you need to modify your ``Android.mk`` file: add the following two code lines after the ``"include $(CLEAR_VARS)"`` and before - ``"include path_to_OpenCV-2.4.4-android-sdk/sdk/native/jni/OpenCV.mk"`` + ``"include path_to_OpenCV-2.4.5-android-sdk/sdk/native/jni/OpenCV.mk"`` .. code-block:: make :linenos: @@ -221,7 +221,7 @@ taken: .. code-block:: make - include C:\Work\OpenCV4Android\OpenCV-2.4.4-android-sdk\sdk\native\jni\OpenCV.mk + include C:\Work\OpenCV4Android\OpenCV-2.4.5-android-sdk\sdk\native\jni\OpenCV.mk Should be inserted into the :file:`jni/Android.mk` file **after** this line: diff --git a/modules/core/include/opencv2/core/version.hpp b/modules/core/include/opencv2/core/version.hpp index 9b2c02c30..5676a1795 100644 --- a/modules/core/include/opencv2/core/version.hpp +++ b/modules/core/include/opencv2/core/version.hpp @@ -49,7 +49,7 @@ #define CV_VERSION_EPOCH 2 #define CV_VERSION_MAJOR 4 -#define CV_VERSION_MINOR 4 +#define CV_VERSION_MINOR 5 #define CV_VERSION_REVISION 0 #define CVAUX_STR_EXP(__A) #__A diff --git a/modules/java/generator/src/java/android+OpenCVLoader.java b/modules/java/generator/src/java/android+OpenCVLoader.java index 70e94944d..fb05b826c 100644 --- a/modules/java/generator/src/java/android+OpenCVLoader.java +++ b/modules/java/generator/src/java/android+OpenCVLoader.java @@ -22,6 +22,12 @@ public class OpenCVLoader */ public static final String OPENCV_VERSION_2_4_4 = "2.4.4"; + /** + * OpenCV Library version 2.4.5. + */ + public static final String OPENCV_VERSION_2_4_5 = "2.4.5"; + + /** * Loads and initializes OpenCV library from current application package. Roughly, it's an analog of system.loadLibrary("opencv_java"). * @return Returns true is initialization of OpenCV was successful. From a914088f29f03f962274e9d0a0cee7fa4bcfcd0d Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Wed, 3 Apr 2013 23:11:33 -0700 Subject: [PATCH 57/67] Build warning fixes. --- modules/core/include/opencv2/core/types_c.h | 4 +++- modules/imgproc/src/imgwarp.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/core/include/opencv2/core/types_c.h b/modules/core/include/opencv2/core/types_c.h index 3a0830463..8db2fe766 100644 --- a/modules/core/include/opencv2/core/types_c.h +++ b/modules/core/include/opencv2/core/types_c.h @@ -315,6 +315,8 @@ CV_INLINE int cvRound( double value ) fistp t; } return t; +#elif defined _MSC_VER && defined _M_ARM && defined HAVE_TEGRA_OPTIMIZATION + TEGRA_ROUND(value); #elif defined HAVE_LRINT || defined CV_ICC || defined __GNUC__ # ifdef HAVE_TEGRA_OPTIMIZATION TEGRA_ROUND(value); @@ -324,7 +326,7 @@ CV_INLINE int cvRound( double value ) #else double intpart, fractpart; fractpart = modf(value, &intpart); - if ((abs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0)) + if ((fabs(fractpart) != 0.5) || ((((int)intpart) % 2) != 0)) return (int)(value + (value >= 0 ? 0.5 : -0.5)); else return (int)intpart; diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index 2fe80616a..848f6e980 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -1740,7 +1740,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, #ifdef HAVE_TEGRA_OPTIMIZATION - if (tegra::resize(src, dst, inv_scale_x, inv_scale_y, interpolation)) + if (tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation)) return; #endif From 2b1e13f8f45bc7175c61a07580daa61ff192b3ee Mon Sep 17 00:00:00 2001 From: Gurpinder Singh Sandhu Date: Thu, 4 Apr 2013 22:52:07 +0530 Subject: [PATCH 58/67] changed surfFeaturesFinder::find() to allow CV_8UC1 type images http://code.opencv.org/issues/2926 --- modules/stitching/src/matchers.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index 2231d1368..d05afff24 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -348,7 +348,15 @@ SurfFeaturesFinder::SurfFeaturesFinder(double hess_thresh, int num_octaves, int void SurfFeaturesFinder::find(const Mat &image, ImageFeatures &features) { Mat gray_image; - CV_Assert(image.type() == CV_8UC3); + CV_Assert((image.type() == CV_8UC3) || (image.type() == CV_8UC1)); + if(image.type() == CV_8UC3) + { + cvtColor(image, gray_image, CV_BGR2GRAY); + } + else + { + gray_image = image; + } cvtColor(image, gray_image, CV_BGR2GRAY); if (surf == 0) { From 69127e4105b3798d7540f40d34e768d946038e7c Mon Sep 17 00:00:00 2001 From: Gurpinder Singh Sandhu Date: Thu, 4 Apr 2013 22:54:27 +0530 Subject: [PATCH 59/67] some typo --- modules/stitching/src/matchers.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index d05afff24..9bab58c52 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -357,7 +357,6 @@ void SurfFeaturesFinder::find(const Mat &image, ImageFeatures &features) { gray_image = image; } - cvtColor(image, gray_image, CV_BGR2GRAY); if (surf == 0) { detector_->detect(gray_image, features.keypoints); From d51d05fc448b456c6191257b90055f93071ec88e Mon Sep 17 00:00:00 2001 From: Gurpinder Singh Sandhu Date: Thu, 4 Apr 2013 23:12:18 +0530 Subject: [PATCH 60/67] Feature description tutorials made in sync with sample code sample code : https://github.com/Itseez/opencv/blob/master/samples/cpp/tutorial_code/features2D/SURF_descriptor.cpp Bug #2888 --- .../features2d/feature_description/feature_description.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorials/features2d/feature_description/feature_description.rst b/doc/tutorials/features2d/feature_description/feature_description.rst index fe9b7cdbf..6f364e03e 100644 --- a/doc/tutorials/features2d/feature_description/feature_description.rst +++ b/doc/tutorials/features2d/feature_description/feature_description.rst @@ -69,7 +69,7 @@ This tutorial code's is shown lines below. You can also download it from `here < extractor.compute( img_2, keypoints_2, descriptors_2 ); //-- Step 3: Matching descriptor vectors with a brute force matcher - BruteForceMatcher< L2 > matcher; + BFMatcher matcher(NORM_L2); std::vector< DMatch > matches; matcher.match( descriptors_1, descriptors_2, matches ); From 87b84a41971b2bb1d33ef65a4d1bf8f4e7163860 Mon Sep 17 00:00:00 2001 From: Gurpinder Singh Sandhu Date: Thu, 4 Apr 2013 23:13:03 +0530 Subject: [PATCH 61/67] another update --- .../features2d/feature_description/feature_description.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/tutorials/features2d/feature_description/feature_description.rst b/doc/tutorials/features2d/feature_description/feature_description.rst index 6f364e03e..0329cc464 100644 --- a/doc/tutorials/features2d/feature_description/feature_description.rst +++ b/doc/tutorials/features2d/feature_description/feature_description.rst @@ -13,7 +13,7 @@ In this tutorial you will learn how to: * Use the :descriptor_extractor:`DescriptorExtractor<>` interface in order to find the feature vector correspondent to the keypoints. Specifically: * Use :surf_descriptor_extractor:`SurfDescriptorExtractor<>` and its function :descriptor_extractor:`compute<>` to perform the required calculations. - * Use a :brute_force_matcher:`BruteForceMatcher<>` to match the features vector + * Use a :brute_force_matcher:`BFMatcher<>` to match the features vector * Use the function :draw_matches:`drawMatches<>` to draw the detected matches. From bcc086baa997ebc64a237666283489d4f4040104 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 5 Apr 2013 08:15:05 +0800 Subject: [PATCH 62/67] fix all redefine build errors on some Intel OCL --- modules/ocl/src/opencl/arithm_absdiff.cl | 122 +++-- modules/ocl/src/opencl/arithm_add.cl | 96 ++-- modules/ocl/src/opencl/arithm_addWeighted.cl | 238 +++++----- modules/ocl/src/opencl/arithm_add_scalar.cl | 95 ++-- .../ocl/src/opencl/arithm_add_scalar_mask.cl | 95 ++-- modules/ocl/src/opencl/arithm_bitwise_and.cl | 194 ++++---- .../ocl/src/opencl/arithm_bitwise_and_mask.cl | 435 ++++++++++-------- .../src/opencl/arithm_bitwise_and_scalar.cl | 327 +++++++------ .../opencl/arithm_bitwise_and_scalar_mask.cl | 374 ++++++++------- modules/ocl/src/opencl/arithm_bitwise_not.cl | 65 ++- modules/ocl/src/opencl/arithm_bitwise_or.cl | 102 ++-- .../ocl/src/opencl/arithm_bitwise_or_mask.cl | 433 +++++++++-------- .../src/opencl/arithm_bitwise_or_scalar.cl | 318 +++++++------ .../opencl/arithm_bitwise_or_scalar_mask.cl | 376 ++++++++------- modules/ocl/src/opencl/arithm_bitwise_xor.cl | 188 ++++---- .../ocl/src/opencl/arithm_bitwise_xor_mask.cl | 433 +++++++++-------- .../src/opencl/arithm_bitwise_xor_scalar.cl | 329 +++++++------ .../opencl/arithm_bitwise_xor_scalar_mask.cl | 376 ++++++++------- modules/ocl/src/opencl/arithm_compare_eq.cl | 239 ++++++---- modules/ocl/src/opencl/arithm_compare_ne.cl | 231 ++++++---- modules/ocl/src/opencl/arithm_div.cl | 125 ++--- modules/ocl/src/opencl/arithm_flip.cl | 32 +- modules/ocl/src/opencl/arithm_mul.cl | 41 +- 23 files changed, 3080 insertions(+), 2184 deletions(-) diff --git a/modules/ocl/src/opencl/arithm_absdiff.cl b/modules/ocl/src/opencl/arithm_absdiff.cl index 37f154216..6ae869d61 100644 --- a/modules/ocl/src/opencl/arithm_absdiff.cl +++ b/modules/ocl/src/opencl/arithm_absdiff.cl @@ -44,7 +44,11 @@ //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -62,7 +66,10 @@ __kernel void arithm_absdiff_D0 (__global uchar *src1, int src1_step, int src1_o if (x < cols && y < rows) { x = x << 2; - + +#ifdef dst_align +#undef dst_align +#endif #define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -110,8 +117,11 @@ __kernel void arithm_absdiff_D2 (__global ushort *src1, int src1_step, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -144,8 +154,11 @@ __kernel void arithm_absdiff_D3 (__global short *src1, int src1_step, int src1_o if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -248,8 +261,11 @@ __kernel void arithm_s_absdiff_C1_D0 (__global uchar *src1, int src1_step, int if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -287,8 +303,11 @@ __kernel void arithm_s_absdiff_C1_D2 (__global ushort *src1, int src1_step, in if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -318,8 +337,11 @@ __kernel void arithm_s_absdiff_C1_D3 (__global short *src1, int src1_step, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -387,8 +409,8 @@ __kernel void arithm_s_absdiff_C1_D5 (__global float *src1, int src1_step, int #if defined (DOUBLE_SUPPORT) __kernel void arithm_s_absdiff_C1_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *dst, int dst_step, int dst_offset, - double4 src2, int rows, int cols, int dst_step1) + __global double *dst, int dst_step, int dst_offset, + double4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -421,8 +443,11 @@ __kernel void arithm_s_absdiff_C2_D0 (__global uchar *src1, int src1_step, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -465,7 +490,7 @@ __kernel void arithm_s_absdiff_C2_D2 (__global ushort *src1, int src1_step, in } __kernel void arithm_s_absdiff_C2_D3 (__global short *src1, int src1_step, int src1_offset, __global short *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -509,7 +534,7 @@ __kernel void arithm_s_absdiff_C2_D4 (__global int *src1, int src1_step, int s } __kernel void arithm_s_absdiff_C2_D5 (__global float *src1, int src1_step, int src1_offset, __global float *dst, int dst_step, int dst_offset, - float4 src2, int rows, int cols, int dst_step1) + float4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -563,8 +588,11 @@ __kernel void arithm_s_absdiff_C3_D0 (__global uchar *src1, int src1_step, int if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -617,8 +645,11 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -644,16 +675,16 @@ __kernel void arithm_s_absdiff_C3_D2 (__global ushort *src1, int src1_step, in data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int src1_offset, @@ -667,8 +698,11 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -694,16 +728,16 @@ __kernel void arithm_s_absdiff_C3_D3 (__global short *src1, int src1_step, int data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_s_absdiff_C3_D4 (__global int *src1, int src1_step, int src1_offset, @@ -735,9 +769,9 @@ __kernel void arithm_s_absdiff_C3_D4 (__global int *src1, int src1_step, int s int tmp_data_1 = convert_int_sat(abs_diff(src1_data_1, src2_data_1)); int tmp_data_2 = convert_int_sat(abs_diff(src1_data_2, src2_data_2)); - *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } __kernel void arithm_s_absdiff_C3_D5 (__global float *src1, int src1_step, int src1_offset, @@ -769,9 +803,9 @@ __kernel void arithm_s_absdiff_C3_D5 (__global float *src1, int src1_step, int float tmp_data_1 = fabs(src1_data_1 - src2_data_1); float tmp_data_2 = fabs(src1_data_2 - src2_data_2); - *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } @@ -805,9 +839,9 @@ __kernel void arithm_s_absdiff_C3_D6 (__global double *src1, int src1_step, in double tmp_data_1 = fabs(src1_data_1 - src2_data_1); double tmp_data_2 = fabs(src1_data_2 - src2_data_2); - *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2; } } #endif diff --git a/modules/ocl/src/opencl/arithm_add.cl b/modules/ocl/src/opencl/arithm_add.cl index 789a42444..647171578 100644 --- a/modules/ocl/src/opencl/arithm_add.cl +++ b/modules/ocl/src/opencl/arithm_add.cl @@ -45,7 +45,11 @@ //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -64,7 +68,10 @@ __kernel void arithm_add_D0 (__global uchar *src1, int src1_step, int src1_offse { x = x << 2; - #define dst_align (dst_offset & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -112,7 +119,10 @@ __kernel void arithm_add_D2 (__global ushort *src1, int src1_step, int src1_offs { x = x << 2; - #define dst_align ((dst_offset >> 1) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -147,7 +157,10 @@ __kernel void arithm_add_D3 (__global short *src1, int src1_step, int src1_offse { x = x << 2; - #define dst_align ((dst_offset >> 1) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -252,7 +265,10 @@ __kernel void arithm_add_with_mask_C1_D0 (__global uchar *src1, int src1_step, i { x = x << 2; - #define dst_align (dst_offset & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -311,7 +327,10 @@ __kernel void arithm_add_with_mask_C1_D2 (__global ushort *src1, int src1_step, { x = x << 1; - #define dst_align ((dst_offset >> 1) & 1) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -348,7 +367,10 @@ __kernel void arithm_add_with_mask_C1_D3 (__global short *src1, int src1_step, i { x = x << 1; - #define dst_align ((dst_offset >> 1) & 1) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -477,7 +499,10 @@ __kernel void arithm_add_with_mask_C2_D0 (__global uchar *src1, int src1_step, i { x = x << 1; - #define dst_align ((dst_offset >> 1) & 1) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -664,7 +689,10 @@ __kernel void arithm_add_with_mask_C3_D0 (__global uchar *src1, int src1_step, i { x = x << 2; - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -724,7 +752,10 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step, { x = x << 1; - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -754,16 +785,16 @@ __kernel void arithm_add_with_mask_C3_D2 (__global ushort *src1, int src1_step, data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, @@ -780,7 +811,10 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i { x = x << 1; - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -810,16 +844,16 @@ __kernel void arithm_add_with_mask_C3_D3 (__global short *src1, int src1_step, i data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, @@ -861,9 +895,9 @@ __kernel void arithm_add_with_mask_C3_D4 (__global int *src1, int src1_step, i data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset, @@ -905,9 +939,9 @@ __kernel void arithm_add_with_mask_C3_D5 (__global float *src1, int src1_step, i data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global float *)((__global char *)dst + dst_index + 0))= data_0; - *((__global float *)((__global char *)dst + dst_index + 4))= data_1; - *((__global float *)((__global char *)dst + dst_index + 8))= data_2; + *((__global float *)((__global char *)dst + dst_index + 0))= data_0; + *((__global float *)((__global char *)dst + dst_index + 4))= data_1; + *((__global float *)((__global char *)dst + dst_index + 8))= data_2; } } @@ -951,9 +985,9 @@ __kernel void arithm_add_with_mask_C3_D6 (__global double *src1, int src1_step, data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global double *)((__global char *)dst + dst_index + 16))= data_2; + *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global double *)((__global char *)dst + dst_index + 16))= data_2; } } #endif diff --git a/modules/ocl/src/opencl/arithm_addWeighted.cl b/modules/ocl/src/opencl/arithm_addWeighted.cl index d76f994aa..d3a002625 100644 --- a/modules/ocl/src/opencl/arithm_addWeighted.cl +++ b/modules/ocl/src/opencl/arithm_addWeighted.cl @@ -42,8 +42,12 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#if defined DOUBLE_SUPPORT +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif typedef double F; #else typedef float F; @@ -52,10 +56,10 @@ typedef float F; /////////////////////////////////////////////addWeighted////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset, - __global uchar *src2, int src2_step,int src2_offset, - F alpha,F beta,F gama, - __global uchar *dst, int dst_step,int dst_offset, - int rows, int cols,int dst_step1) + __global uchar *src2, int src2_step,int src2_offset, + F alpha,F beta,F gama, + __global uchar *dst, int dst_step,int dst_offset, + int rows, int cols,int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -65,7 +69,10 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset { x = x << 2; - #define dst_align (dst_offset & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -87,7 +94,7 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); // short4 tmp = convert_short4_sat(src1_data) * alpha + convert_short4_sat(src2_data) * beta + gama; - short4 tmp; + short4 tmp; tmp.x = src1_data.x * alpha + src2_data.x * beta + gama; tmp.y = src1_data.y * alpha + src2_data.y * beta + gama; tmp.z = src1_data.z * alpha + src2_data.z * beta + gama; @@ -100,7 +107,7 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; *((__global uchar4 *)(dst + dst_index)) = dst_data; - // dst[x + y * dst_step] = src1[x + y * src1_step] * alpha + src2[x + y * src2_step] * beta + gama; + // dst[x + y * dst_step] = src1[x + y * src1_step] * alpha + src2[x + y * src2_step] * beta + gama; } } @@ -108,10 +115,10 @@ __kernel void addWeighted_D0 (__global uchar *src1,int src1_step,int src1_offset __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offset, - __global ushort *src2, int src2_step,int src2_offset, - F alpha,F beta,F gama, - __global ushort *dst, int dst_step,int dst_offset, - int rows, int cols,int dst_step1) + __global ushort *src2, int src2_step,int src2_offset, + F alpha,F beta,F gama, + __global ushort *dst, int dst_step,int dst_offset, + int rows, int cols,int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -121,35 +128,38 @@ __kernel void addWeighted_D2 (__global ushort *src1, int src1_step,int src1_offs { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) & (int)0xfffffff8); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix)); ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + ushort4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + ushort4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index)); - // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama; - int4 tmp; + // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama; + int4 tmp; tmp.x = src1_data.x * alpha + src2_data.x * beta + gama; tmp.y = src1_data.y * alpha + src2_data.y * beta + gama; tmp.z = src1_data.z * alpha + src2_data.z * beta + gama; @@ -181,8 +191,11 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset -( dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset -( dst_align << 1)); @@ -190,26 +203,26 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset +( x<< 1) - (dst_align << 1 )); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix)); short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + short4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + short4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index)); - // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama; - int4 tmp; + // int4 tmp = convert_int4_sat(src1_data) * alpha + convert_int4_sat(src2_data) * beta + gama; + int4 tmp; tmp.x = src1_data.x * alpha + src2_data.x * beta + gama; tmp.y = src1_data.y * alpha + src2_data.y * beta + gama; tmp.z = src1_data.z * alpha + src2_data.z * beta + gama; @@ -228,7 +241,7 @@ __kernel void addWeighted_D3 (__global short *src1, int src1_step,int src1_offse __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset, __global int *src2, int src2_step,int src2_offset, - F alpha,F beta, F gama, + F alpha,F beta, F gama, __global int *dst, int dst_step,int dst_offset, int rows, int cols,int dst_step1) { @@ -241,9 +254,12 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset, x = x << 2; - #define bitOfInt (sizeof(int)== 4 ? 2: 3) - - #define dst_align ((dst_offset >> bitOfInt) & 3) +#define bitOfInt (sizeof(int)== 4 ? 2: 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> bitOfInt) & 3) int src1_index = mad24(y, src1_step, (x << bitOfInt) + src1_offset - (dst_align << bitOfInt)); int src2_index = mad24(y, src2_step, (x << bitOfInt) + src2_offset - (dst_align << bitOfInt)); @@ -252,26 +268,26 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset, int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + (x << bitOfInt) -(dst_align << bitOfInt)); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index_fix)); int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - int4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - int4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + int4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + int4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } int4 dst_data = *((__global int4 *)((__global char *)dst + dst_index)); - // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ; - float4 tmp; + // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ; + float4 tmp; tmp.x = src1_data.x * alpha + src2_data.x * beta + gama; tmp.y = src1_data.y * alpha + src2_data.y * beta + gama; tmp.z = src1_data.z * alpha + src2_data.z * beta + gama; @@ -291,7 +307,7 @@ __kernel void addWeighted_D4 (__global int *src1, int src1_step,int src1_offset, __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset, __global float *src2, int src2_step,int src2_offset, - F alpha,F beta, F gama, + F alpha,F beta, F gama, __global float *dst, int dst_step,int dst_offset, int rows, int cols,int dst_step1) { @@ -303,8 +319,11 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset { x = x << 2; - - #define dst_align ((dst_offset >> 2) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -313,32 +332,32 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + (x << 2) -(dst_align << 2)); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); float4 dst_data = *((__global float4 *)((__global char *)dst + dst_index)); - if(src1_index < 0) - { - float4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - float4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ; + if(src1_index < 0) + { + float4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + float4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } + // double4 tmp = convert_double4(src1_data) * alpha + convert_double4(src2_data) * beta + gama ; - // float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ; - float4 tmp_data; + // float4 tmp_data =(src1_data) * alpha + (src2_data) * beta + gama ; + float4 tmp_data; tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama; tmp_data.y = src1_data.y * alpha + src2_data.y * beta + gama; tmp_data.z = src1_data.z * alpha + src2_data.z * beta + gama; tmp_data.w = src1_data.w * alpha + src2_data.w * beta + gama; - // float4 tmp_data = convert_float4(tmp); + // float4 tmp_data = convert_float4(tmp); dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; dst_data.y = ((dst_index + 4 >= dst_start) && (dst_index + 4 < dst_end)) ? tmp_data.y : dst_data.y; @@ -353,7 +372,7 @@ __kernel void addWeighted_D5 (__global float *src1,int src1_step,int src1_offset #if defined (DOUBLE_SUPPORT) __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offset, __global double *src2, int src2_step,int src2_offset, - F alpha,F beta, F gama, + F alpha,F beta, F gama, __global double *dst, int dst_step,int dst_offset, int rows, int cols,int dst_step1) { @@ -365,8 +384,11 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs { x = x << 2; - - #define dst_align ((dst_offset >> 3) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 3) & 3) int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); @@ -375,25 +397,25 @@ __kernel void addWeighted_D6 (__global double *src1, int src1_step,int src1_offs int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + (x << 3) -(dst_align << 3)); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; double4 src1_data = vload4(0, (__global double *)((__global char *)src1 + src1_index_fix)); double4 src2_data = vload4(0, (__global double *)((__global char *)src2 + src2_index_fix)); double4 dst_data = *((__global double4 *)((__global char *)dst + dst_index)); - if(src1_index < 0) - { - double4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - double4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } - // double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ; - double4 tmp_data; + if(src1_index < 0) + { + double4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + double4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } + // double4 tmp_data = (src1_data) * alpha + (src2_data) * beta + gama ; + double4 tmp_data; tmp_data.x = src1_data.x * alpha + src2_data.x * beta + gama; tmp_data.y = src1_data.y * alpha + src2_data.y * beta + gama; tmp_data.z = src1_data.z * alpha + src2_data.z * beta + gama; diff --git a/modules/ocl/src/opencl/arithm_add_scalar.cl b/modules/ocl/src/opencl/arithm_add_scalar.cl index 05b813dc8..15ae95df2 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar.cl @@ -44,9 +44,13 @@ //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable #endif +#endif /**************************************add with scalar without mask**************************************/ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, __global uchar *dst, int dst_step, int dst_offset, @@ -58,8 +62,11 @@ __kernel void arithm_s_add_C1_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -98,8 +105,11 @@ __kernel void arithm_s_add_C1_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -130,8 +140,11 @@ __kernel void arithm_s_add_C1_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -232,8 +245,11 @@ __kernel void arithm_s_add_C2_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -377,8 +393,11 @@ __kernel void arithm_s_add_C3_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -431,8 +450,11 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -458,16 +480,16 @@ __kernel void arithm_s_add_C3_D2 (__global ushort *src1, int src1_step, int sr data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src1_offset, @@ -481,8 +503,11 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -508,16 +533,16 @@ __kernel void arithm_s_add_C3_D3 (__global short *src1, int src1_step, int src data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_s_add_C3_D4 (__global int *src1, int src1_step, int src1_offset, @@ -549,9 +574,9 @@ __kernel void arithm_s_add_C3_D4 (__global int *src1, int src1_step, int src1_ int tmp_data_1 = convert_int_sat((long)src1_data_1 + (long)src2_data_1); int tmp_data_2 = convert_int_sat((long)src1_data_2 + (long)src2_data_2); - *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } __kernel void arithm_s_add_C3_D5 (__global float *src1, int src1_step, int src1_offset, @@ -583,9 +608,9 @@ __kernel void arithm_s_add_C3_D5 (__global float *src1, int src1_step, int src float tmp_data_1 = src1_data_1 + src2_data_1; float tmp_data_2 = src1_data_2 + src2_data_2; - *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global float *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global float *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global float *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } @@ -619,9 +644,9 @@ __kernel void arithm_s_add_C3_D6 (__global double *src1, int src1_step, int sr double tmp_data_1 = src1_data_1 + src2_data_1; double tmp_data_2 = src1_data_2 + src2_data_2; - *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global double *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global double *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global double *)((__global char *)dst + dst_index + 16))= tmp_data_2; } } #endif diff --git a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl index 4acb5be6a..1e2ae71af 100644 --- a/modules/ocl/src/opencl/arithm_add_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_add_scalar_mask.cl @@ -44,7 +44,11 @@ //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif /**************************************add with scalar with mask**************************************/ @@ -60,8 +64,11 @@ __kernel void arithm_s_add_with_mask_C1_D0 (__global uchar *src1, int src1_ste if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -110,8 +117,11 @@ __kernel void arithm_s_add_with_mask_C1_D2 (__global ushort *src1, int src1_st if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -145,8 +155,11 @@ __kernel void arithm_s_add_with_mask_C1_D3 (__global short *src1, int src1_ste if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -266,8 +279,11 @@ __kernel void arithm_s_add_with_mask_C2_D0 (__global uchar *src1, int src1_ste if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -442,8 +458,11 @@ __kernel void arithm_s_add_with_mask_C3_D0 (__global uchar *src1, int src1_ste if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -500,8 +519,11 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -530,16 +552,16 @@ __kernel void arithm_s_add_with_mask_C3_D2 (__global ushort *src1, int src1_st data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, @@ -554,8 +576,11 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -584,16 +609,16 @@ __kernel void arithm_s_add_with_mask_C3_D3 (__global short *src1, int src1_ste data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_s_add_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, @@ -633,9 +658,9 @@ __kernel void arithm_s_add_with_mask_C3_D4 (__global int *src1, int src1_step, data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } __kernel void arithm_s_add_with_mask_C3_D5 (__global float *src1, int src1_step, int src1_offset, @@ -675,9 +700,9 @@ __kernel void arithm_s_add_with_mask_C3_D5 (__global float *src1, int src1_ste data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global float *)((__global char *)dst + dst_index + 0))= data_0; - *((__global float *)((__global char *)dst + dst_index + 4))= data_1; - *((__global float *)((__global char *)dst + dst_index + 8))= data_2; + *((__global float *)((__global char *)dst + dst_index + 0))= data_0; + *((__global float *)((__global char *)dst + dst_index + 4))= data_1; + *((__global float *)((__global char *)dst + dst_index + 8))= data_2; } } @@ -719,9 +744,9 @@ __kernel void arithm_s_add_with_mask_C3_D6 (__global double *src1, int src1_st data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global double *)((__global char *)dst + dst_index + 16))= data_2; + *((__global double *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global double *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global double *)((__global char *)dst + dst_index + 16))= data_2; } } #endif diff --git a/modules/ocl/src/opencl/arithm_bitwise_and.cl b/modules/ocl/src/opencl/arithm_bitwise_and.cl index 8adc56de5..a369d8743 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_and.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_and.cl @@ -43,7 +43,11 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -51,9 +55,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_and without mask**************************************/ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -61,31 +65,34 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - uchar4 src1_data = vload4(0, src1 + src1_index_fix); - uchar4 src2_data = vload4(0, src2 + src2_index_fix); + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; + uchar4 src1_data = vload4(0, src1 + src1_index_fix); + uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = src1_data & src2_data; @@ -101,9 +108,9 @@ __kernel void arithm_bitwise_and_D0 (__global uchar *src1, int src1_step, int sr __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -111,8 +118,11 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -120,23 +130,23 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; - char4 src1_data = vload4(0, src1 + src1_index_fix); - char4 src2_data = vload4(0, src2 + src2_index_fix); + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; + char4 src1_data = vload4(0, src1 + src1_index_fix); + char4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - char4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - char4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + char4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + char4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } char4 dst_data = *((__global char4 *)(dst + dst_index)); char4 tmp_data = src1_data & src2_data; @@ -151,9 +161,9 @@ __kernel void arithm_bitwise_and_D1 (__global char *src1, int src1_step, int src __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -162,8 +172,11 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -171,23 +184,23 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix)); ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + ushort4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + ushort4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index)); ushort4 tmp_data = src1_data & src2_data; @@ -203,9 +216,9 @@ __kernel void arithm_bitwise_and_D2 (__global ushort *src1, int src1_step, int s __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -214,8 +227,11 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -223,23 +239,23 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix)); short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + short4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + short4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index)); short4 tmp_data = src1_data & src2_data; @@ -255,9 +271,9 @@ __kernel void arithm_bitwise_and_D3 (__global short *src1, int src1_step, int sr __kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -277,9 +293,9 @@ __kernel void arithm_bitwise_and_D4 (__global int *src1, int src1_step, int src1 } __kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -300,9 +316,9 @@ __kernel void arithm_bitwise_and_D5 (__global char *src1, int src1_step, int src #if defined (DOUBLE_SUPPORT) __kernel void arithm_bitwise_and_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/ocl/src/opencl/arithm_bitwise_and_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_and_mask.cl index 595fb2ceb..fbc42364a 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_and_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_and_mask.cl @@ -43,18 +43,22 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_AND//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_and with mask**************************************/ -__kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -63,8 +67,11 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1 if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -91,11 +98,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1 -__kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -104,8 +112,11 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -132,11 +143,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_ -__kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -145,8 +157,11 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -171,11 +186,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src -__kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -184,8 +200,11 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -198,8 +217,8 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1 short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index)); uchar2 mask_data = vload2(0, mask + mask_index); - short2 data = *((__global short2 *)((__global uchar *)dst + dst_index)); - short2 tmp_data = src1_data & src2_data; + short2 data = *((__global short2 *)((__global uchar *)dst + dst_index)); + short2 tmp_data = src1_data & src2_data; data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x; data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y; @@ -210,11 +229,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1 -__kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -242,11 +262,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1 -__kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -274,12 +295,12 @@ __kernel void arithm_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_ -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C1_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -305,15 +326,15 @@ __kernel void arithm_bitwise_and_with_mask_C1_D6 (__global char *src1, int src1_ } } -#endif -__kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -322,8 +343,11 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -347,11 +371,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1 } -__kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -360,8 +385,11 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_ if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -384,11 +412,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_ } } -__kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -413,11 +442,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -442,11 +472,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1 *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -471,11 +502,12 @@ __kernel void arithm_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1 *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -500,12 +532,13 @@ __kernel void arithm_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_ *((__global char8 *)((__global char *)dst + dst_index)) = data; } } -#if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + +__kernel void arithm_bitwise_and_with_mask_C2_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -530,15 +563,15 @@ __kernel void arithm_bitwise_and_with_mask_C2_D6 (__global char *src1, int src1_ *((__global char16 *)((__global char *)dst + dst_index)) = data; } } -#endif -__kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -547,8 +580,11 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1 if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -596,11 +632,12 @@ __kernel void arithm_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1 } -__kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -609,8 +646,11 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -657,11 +697,12 @@ __kernel void arithm_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_ } } -__kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -670,8 +711,11 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -701,23 +745,24 @@ __kernel void arithm_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -726,8 +771,11 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -757,23 +805,24 @@ __kernel void arithm_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1 data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -808,16 +857,17 @@ __kernel void arithm_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1 data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -852,17 +902,18 @@ __kernel void arithm_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_ data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C3_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -897,20 +948,20 @@ __kernel void arithm_bitwise_and_with_mask_C3_D6 (__global char *src1, int src1_ data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2; + *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2; } } #endif - -__kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -937,11 +988,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1 } -__kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -967,11 +1019,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_ } } -__kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -996,11 +1049,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1025,11 +1079,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1 *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1054,11 +1109,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1 *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1084,11 +1140,12 @@ __kernel void arithm_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_ } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_and_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_and_with_mask_C4_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); diff --git a/modules/ocl/src/opencl/arithm_bitwise_and_scalar.cl b/modules/ocl/src/opencl/arithm_bitwise_and_scalar.cl index a5152ce0b..5058d318e 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_and_scalar.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_and_scalar.cl @@ -42,19 +42,22 @@ // the use of this software, even if advised of the possibility of such damage. // // -#if defined (__ATI__) -#pragma OPENCL EXTENSION cl_amd_fp64:enable -#elif defined (__NVIDIA__) +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_AND//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************and with scalar without mask**************************************/ -__kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -62,8 +65,11 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -86,9 +92,10 @@ __kernel void arithm_s_bitwise_and_C1_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -96,8 +103,11 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -119,9 +129,10 @@ __kernel void arithm_s_bitwise_and_C1_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -130,8 +141,11 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -150,9 +164,10 @@ __kernel void arithm_s_bitwise_and_C1_D2 (__global ushort *src1, int src1_step *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -161,8 +176,11 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -181,9 +199,10 @@ __kernel void arithm_s_bitwise_and_C1_D3 (__global short *src1, int src1_step, *((__global short2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -202,9 +221,10 @@ __kernel void arithm_s_bitwise_and_C1_D4 (__global int *src1, int src1_step, i *((__global int *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -232,11 +252,11 @@ __kernel void arithm_s_bitwise_and_C1_D5 (__global char *src1, int src1_step, *((__global char4 *)((__global char *)dst + dst_index)) = data; } } - #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C1_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -256,9 +276,10 @@ __kernel void arithm_s_bitwise_and_C1_D6 (__global short *src1, int src1_step, i } } #endif -__kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -267,8 +288,11 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -290,9 +314,10 @@ __kernel void arithm_s_bitwise_and_C2_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -301,8 +326,11 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -322,9 +350,10 @@ __kernel void arithm_s_bitwise_and_C2_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -343,9 +372,10 @@ __kernel void arithm_s_bitwise_and_C2_D2 (__global ushort *src1, int src1_step *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -364,9 +394,10 @@ __kernel void arithm_s_bitwise_and_C2_D3 (__global short *src1, int src1_step, *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -384,9 +415,10 @@ __kernel void arithm_s_bitwise_and_C2_D4 (__global int *src1, int src1_step, i *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -403,12 +435,13 @@ __kernel void arithm_s_bitwise_and_C2_D5 (__global char *src1, int src1_step, char8 tmp_data = src1_data & src2_data; *((__global char8 *)((__global char *)dst + dst_index)) = tmp_data; - } + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C2_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -428,9 +461,10 @@ __kernel void arithm_s_bitwise_and_C2_D6 (__global short *src1, int src1_step, i } } #endif -__kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -439,8 +473,11 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -484,9 +521,10 @@ __kernel void arithm_s_bitwise_and_C3_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -495,8 +533,11 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -539,9 +580,10 @@ __kernel void arithm_s_bitwise_and_C3_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -550,8 +592,11 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -577,21 +622,22 @@ __kernel void arithm_s_bitwise_and_C3_D2 (__global ushort *src1, int src1_step data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -600,8 +646,11 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -627,21 +676,22 @@ __kernel void arithm_s_bitwise_and_C3_D3 (__global short *src1, int src1_step, data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -668,14 +718,15 @@ __kernel void arithm_s_bitwise_and_C3_D4 (__global int *src1, int src1_step, i int tmp_data_1 = src1_data_1 & src2_data_1; int tmp_data_2 = src1_data_2 & src2_data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } -__kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -702,15 +753,16 @@ __kernel void arithm_s_bitwise_and_C3_D5 (__global char *src1, int src1_step, char4 tmp_data_1 = src1_data_1 & src2_data_1; char4 tmp_data_2 = src1_data_2 & src2_data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C3_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -737,15 +789,16 @@ __kernel void arithm_s_bitwise_and_C3_D6 (__global short *src1, int src1_step, i short4 tmp_data_1 = src1_data_1 & src2_data_1; short4 tmp_data_2 = src1_data_2 & src2_data_2; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; } } #endif -__kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -765,9 +818,10 @@ __kernel void arithm_s_bitwise_and_C4_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -786,9 +840,10 @@ __kernel void arithm_s_bitwise_and_C4_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -806,9 +861,10 @@ __kernel void arithm_s_bitwise_and_C4_D2 (__global ushort *src1, int src1_step *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -826,9 +882,10 @@ __kernel void arithm_s_bitwise_and_C4_D3 (__global short *src1, int src1_step, *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -846,9 +903,10 @@ __kernel void arithm_s_bitwise_and_C4_D4 (__global int *src1, int src1_step, i *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -869,9 +927,10 @@ __kernel void arithm_s_bitwise_and_C4_D5 (__global char *src1, int src1_step, } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_C4_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -897,10 +956,10 @@ __kernel void arithm_s_bitwise_and_C4_D6 (__global short *src1, int src1_step, i short4 tmp_data_2 = src1_data_2 & src2_data_2; short4 tmp_data_3 = src1_data_3 & src2_data_3; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; - *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3; } } diff --git a/modules/ocl/src/opencl/arithm_bitwise_and_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_and_scalar_mask.cl index beafd7e0a..71371737d 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_and_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_and_scalar_mask.cl @@ -42,20 +42,22 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#if defined (__ATI__) -#pragma OPENCL EXTENSION cl_amd_fp64:enable -#elif defined (__NVIDIA__) +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_AND//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_and with scalar with mask**************************************/ -__kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -64,8 +66,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -90,10 +95,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -102,8 +108,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -127,10 +136,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -139,8 +149,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -161,10 +174,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D2 (__global ushort *src1, int *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -173,8 +187,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -195,10 +212,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D3 (__global short *src1, int *((__global short2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -223,10 +241,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D4 (__global int *src1, int } } -__kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -252,10 +271,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D5 (__global char *src1, int src } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C1_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -280,10 +300,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C1_D6 (__global short *src1, int sr } } #endif -__kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -292,8 +313,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -316,10 +340,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -328,8 +353,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -351,10 +379,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -378,10 +407,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D2 (__global ushort *src1, int *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -405,10 +435,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D3 (__global short *src1, int *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -432,10 +463,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D4 (__global int *src1, int sr *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -461,10 +493,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D5 (__global char *src1, int s } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C2_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -489,10 +522,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C2_D6 (__global short *src1, int sr } } #endif -__kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -501,8 +535,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -549,10 +586,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -561,8 +599,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -608,10 +649,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -620,8 +662,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -650,22 +695,23 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D2 (__global ushort *src1, int data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -674,8 +720,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -704,22 +753,23 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D3 (__global short *src1, int data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -753,15 +803,16 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D4 (__global int *src1, int sr data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -795,16 +846,17 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D5 (__global char *src1, int s data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C3_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -838,16 +890,17 @@ __kernel void arithm_s_bitwise_and_with_mask_C3_D6 (__global short *src1, int sr data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2; } } #endif -__kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -872,10 +925,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -899,10 +953,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -925,10 +980,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D2 (__global ushort *src1, int *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -951,10 +1007,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D3 (__global short *src1, int *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -977,10 +1034,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D4 (__global int *src1, int sr *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1006,10 +1064,11 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D5 (__global char *src1, int s } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_and_with_mask_C4_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1055,3 +1114,4 @@ __kernel void arithm_s_bitwise_and_with_mask_C4_D6 (__global short *src1, int sr } } #endif + diff --git a/modules/ocl/src/opencl/arithm_bitwise_not.cl b/modules/ocl/src/opencl/arithm_bitwise_not.cl index fd9d2ccf9..8eb9ece75 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_not.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_not.cl @@ -43,9 +43,12 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_NOT//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -60,26 +63,29 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; uchar4 src1_data = vload4(0, src1 + src1_index_fix); uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = ~ src1_data; - /* if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - */ + /* if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + */ dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; @@ -91,8 +97,8 @@ __kernel void arithm_bitwise_not_D0 (__global uchar *src1, int src1_step, int sr __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -100,8 +106,11 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -124,8 +133,8 @@ __kernel void arithm_bitwise_not_D1 (__global char *src1, int src1_step, int src __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -134,8 +143,11 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -159,8 +171,8 @@ __kernel void arithm_bitwise_not_D2 (__global ushort *src1, int src1_step, int s __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -169,8 +181,11 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -194,8 +209,8 @@ __kernel void arithm_bitwise_not_D3 (__global short *src1, int src1_step, int sr __kernel void arithm_bitwise_not_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/ocl/src/opencl/arithm_bitwise_or.cl b/modules/ocl/src/opencl/arithm_bitwise_or.cl index a95e59e0c..4d47b2127 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_or.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_or.cl @@ -43,7 +43,11 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -51,9 +55,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_or without mask**************************************/ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -61,30 +65,33 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; uchar4 src1_data = vload4(0, src1 + src1_index_fix); uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = src1_data | src2_data; @@ -99,9 +106,9 @@ __kernel void arithm_bitwise_or_D0 (__global uchar *src1, int src1_step, int src __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -109,8 +116,11 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1 if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -135,9 +145,9 @@ __kernel void arithm_bitwise_or_D1 (__global char *src1, int src1_step, int src1 __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -146,8 +156,11 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -173,9 +186,9 @@ __kernel void arithm_bitwise_or_D2 (__global ushort *src1, int src1_step, int sr __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -184,8 +197,11 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -211,9 +227,9 @@ __kernel void arithm_bitwise_or_D3 (__global short *src1, int src1_step, int src __kernel void arithm_bitwise_or_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -233,9 +249,9 @@ __kernel void arithm_bitwise_or_D4 (__global int *src1, int src1_step, int src1_ } __kernel void arithm_bitwise_or_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -256,9 +272,9 @@ __kernel void arithm_bitwise_or_D5 (__global char *src1, int src1_step, int src1 #if defined (DOUBLE_SUPPORT) __kernel void arithm_bitwise_or_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/ocl/src/opencl/arithm_bitwise_or_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_or_mask.cl index aedb68c47..2523eddcd 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_or_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_or_mask.cl @@ -43,18 +43,22 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_OR//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_or with mask**************************************/ -__kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -63,8 +67,11 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -91,11 +98,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_ -__kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -104,8 +112,11 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -132,11 +143,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_s -__kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -145,8 +157,11 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -171,11 +186,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1 -__kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -184,8 +200,11 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_ if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -198,8 +217,8 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_ short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index)); uchar2 mask_data = vload2(0, mask + mask_index); - short2 data = *((__global short2 *)((__global uchar *)dst + dst_index)); - short2 tmp_data = src1_data | src2_data; + short2 data = *((__global short2 *)((__global uchar *)dst + dst_index)); + short2 tmp_data = src1_data | src2_data; data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x; data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y; @@ -210,11 +229,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_ -__kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -242,11 +262,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_ -__kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -273,13 +294,13 @@ __kernel void arithm_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_s } - #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C1_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -308,12 +329,12 @@ __kernel void arithm_bitwise_or_with_mask_C1_D6 (__global char *src1, int src1_s #endif - -__kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -322,8 +343,11 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_ if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -347,11 +371,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_ } -__kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -360,8 +385,11 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -384,11 +412,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_s } } -__kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -413,11 +442,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1 *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -442,11 +472,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_ *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -471,11 +502,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_ *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -501,11 +533,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_s } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C2_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -533,12 +566,12 @@ __kernel void arithm_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_s #endif - -__kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -547,8 +580,11 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -596,11 +632,12 @@ __kernel void arithm_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_ } -__kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -609,8 +646,11 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -657,11 +697,12 @@ __kernel void arithm_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_s } } -__kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -670,8 +711,11 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -701,23 +745,24 @@ __kernel void arithm_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1 data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -726,8 +771,11 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_ if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -757,23 +805,24 @@ __kernel void arithm_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_ data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -808,16 +857,17 @@ __kernel void arithm_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_ data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -852,17 +902,18 @@ __kernel void arithm_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_s data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C3_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -897,20 +948,20 @@ __kernel void arithm_bitwise_or_with_mask_C3_D6 (__global char *src1, int src1_s data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2; + *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2; } } #endif - -__kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -937,11 +988,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_ } -__kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -967,11 +1019,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_s } } -__kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -996,11 +1049,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1 *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1025,11 +1079,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_ *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1054,11 +1109,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_ *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1084,11 +1140,12 @@ __kernel void arithm_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_s } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_or_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_or_with_mask_C4_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); diff --git a/modules/ocl/src/opencl/arithm_bitwise_or_scalar.cl b/modules/ocl/src/opencl/arithm_bitwise_or_scalar.cl index 5b94591a3..fdcc00c4e 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_or_scalar.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_or_scalar.cl @@ -43,16 +43,21 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_OR//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************and with scalar without mask**************************************/ -__kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -60,8 +65,11 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -84,9 +92,10 @@ __kernel void arithm_s_bitwise_or_C1_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -94,8 +103,11 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -117,9 +129,10 @@ __kernel void arithm_s_bitwise_or_C1_D1 (__global char *src1, int src1_step, i } } -__kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -128,8 +141,11 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -148,9 +164,10 @@ __kernel void arithm_s_bitwise_or_C1_D2 (__global ushort *src1, int src1_step, *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -159,8 +176,11 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -179,9 +199,10 @@ __kernel void arithm_s_bitwise_or_C1_D3 (__global short *src1, int src1_step, *((__global short2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -200,9 +221,10 @@ __kernel void arithm_s_bitwise_or_C1_D4 (__global int *src1, int src1_step, in *((__global int *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -222,9 +244,10 @@ __kernel void arithm_s_bitwise_or_C1_D5 (__global char *src1, int src1_step, i } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C1_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { @@ -245,10 +268,10 @@ __kernel void arithm_s_bitwise_or_C1_D6 (__global short *src1, int src1_step, in } } #endif - -__kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { @@ -258,8 +281,11 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -280,9 +306,10 @@ __kernel void arithm_s_bitwise_or_C2_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { @@ -292,8 +319,11 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -313,9 +343,10 @@ __kernel void arithm_s_bitwise_or_C2_D1 (__global char *src1, int src1_step, i } } -__kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { @@ -335,9 +366,10 @@ __kernel void arithm_s_bitwise_or_C2_D2 (__global ushort *src1, int src1_step, *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { @@ -358,8 +390,8 @@ __kernel void arithm_s_bitwise_or_C2_D3 (__global short *src1, int src1_step, } } __kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { @@ -378,9 +410,10 @@ __kernel void arithm_s_bitwise_or_C2_D4 (__global int *src1, int src1_step, in *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { @@ -400,9 +433,10 @@ __kernel void arithm_s_bitwise_or_C2_D5 (__global char *src1, int src1_step, i } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C2_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { @@ -423,9 +457,10 @@ __kernel void arithm_s_bitwise_or_C2_D6 (__global short *src1, int src1_step, in } } #endif -__kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { @@ -435,8 +470,11 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -480,9 +518,10 @@ __kernel void arithm_s_bitwise_or_C3_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { @@ -492,8 +531,11 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -536,9 +578,10 @@ __kernel void arithm_s_bitwise_or_C3_D1 (__global char *src1, int src1_step, i } } -__kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { @@ -548,8 +591,11 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -575,21 +621,22 @@ __kernel void arithm_s_bitwise_or_C3_D2 (__global ushort *src1, int src1_step, data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { @@ -599,8 +646,11 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -626,21 +676,22 @@ __kernel void arithm_s_bitwise_or_C3_D3 (__global short *src1, int src1_step, data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { @@ -668,14 +719,15 @@ __kernel void arithm_s_bitwise_or_C3_D4 (__global int *src1, int src1_step, in int tmp_data_1 = src1_data_1 | src2_data_1; int tmp_data_2 = src1_data_2 | src2_data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } -__kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { @@ -700,15 +752,16 @@ __kernel void arithm_s_bitwise_or_C3_D5 (__global char *src1, int src1_step, i char4 tmp_data_1 = src1_data_1 | src2_data_1; char4 tmp_data_2 = src1_data_2 | src2_data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C3_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { @@ -736,15 +789,16 @@ __kernel void arithm_s_bitwise_or_C3_D6 (__global short *src1, int src1_step, in short4 tmp_data_1 = src1_data_1 | src2_data_1; short4 tmp_data_2 = src1_data_2 | src2_data_2; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; } } #endif -__kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { @@ -765,9 +819,10 @@ __kernel void arithm_s_bitwise_or_C4_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { @@ -787,9 +842,10 @@ __kernel void arithm_s_bitwise_or_C4_D1 (__global char *src1, int src1_step, i } } -__kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { @@ -808,9 +864,10 @@ __kernel void arithm_s_bitwise_or_C4_D2 (__global ushort *src1, int src1_step, *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { @@ -829,9 +886,10 @@ __kernel void arithm_s_bitwise_or_C4_D3 (__global short *src1, int src1_step, *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { @@ -850,9 +908,10 @@ __kernel void arithm_s_bitwise_or_C4_D4 (__global int *src1, int src1_step, in *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { @@ -874,9 +933,10 @@ __kernel void arithm_s_bitwise_or_C4_D5 (__global char *src1, int src1_step, i } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_C4_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { @@ -903,10 +963,10 @@ __kernel void arithm_s_bitwise_or_C4_D6 (__global short *src1, int src1_step, in short4 tmp_data_2 = src1_data_2 | src2_data_2; short4 tmp_data_3 = src1_data_3 | src2_data_3; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; - *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3; } } diff --git a/modules/ocl/src/opencl/arithm_bitwise_or_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_or_scalar_mask.cl index 54066c21a..8baa9a2ca 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_or_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_or_scalar_mask.cl @@ -43,17 +43,21 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_OR//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_or with scalar with mask**************************************/ -__kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { @@ -63,8 +67,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -89,10 +96,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D0 (__global uchar *src1, int s } -__kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { @@ -102,8 +110,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -127,10 +138,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D1 (__global char *src1, int sr } } -__kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { @@ -140,8 +152,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -162,10 +177,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D2 (__global ushort *src1, int *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { @@ -175,8 +191,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -197,10 +216,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D3 (__global short *src1, int s *((__global short2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { @@ -226,10 +246,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D4 (__global int *src1, int s } } -__kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { @@ -254,12 +275,12 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D5 (__global char *src1, int *((__global char4 *)((__global char *)dst + dst_index)) = data; } } - #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C1_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { @@ -285,10 +306,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C1_D6 (__global short *src1, int src } } #endif -__kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { @@ -298,8 +320,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -322,10 +347,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D0 (__global uchar *src1, int s } -__kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { @@ -335,8 +361,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -358,10 +387,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D1 (__global char *src1, int sr } } -__kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { @@ -386,10 +416,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D2 (__global ushort *src1, int *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { @@ -414,10 +445,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D3 (__global short *src1, int s *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { @@ -442,10 +474,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D4 (__global int *src1, int src *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { @@ -463,17 +496,18 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D5 (__global char *src1, int sr char8 src_data1 = *((__global char8 *)((__global char *)src1 + src1_index)); char8 src_data2 = (char8)(src2.s0, src2.s1, src2.s2, src2.s3, src2.s4, src2.s5, src2.s6, src2.s7); char8 dst_data = *((__global char8 *)((__global char *)dst + dst_index)); - char8 data = src_data1 | src_data2; + char8 data = src_data1 | src_data2; data = mask_data ? data : dst_data; *((__global char8 *)((__global char *)dst + dst_index)) = data; - } + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C2_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { @@ -499,10 +533,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C2_D6 (__global char *src1, int sr } } #endif -__kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { @@ -512,8 +547,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -560,10 +598,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D0 (__global uchar *src1, int s } -__kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { @@ -573,8 +612,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -620,10 +662,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D1 (__global char *src1, int sr } } -__kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { @@ -633,8 +676,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -663,22 +709,23 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D2 (__global ushort *src1, int data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { @@ -688,8 +735,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -718,22 +768,23 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D3 (__global short *src1, int s data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { @@ -768,15 +819,16 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D4 (__global int *src1, int src data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { @@ -811,17 +863,18 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D5 (__global char *src1, int sr data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; - } + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C3_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -855,16 +908,17 @@ __kernel void arithm_s_bitwise_or_with_mask_C3_D6 (__global short *src1, int src data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2; } } #endif -__kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { @@ -890,10 +944,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D0 (__global uchar *src1, int s } -__kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { @@ -918,10 +973,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D1 (__global char *src1, int sr } } -__kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { @@ -945,10 +1001,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D2 (__global ushort *src1, int *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { @@ -972,10 +1029,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D3 (__global short *src1, int s *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { @@ -999,10 +1057,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D4 (__global int *src1, int src *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { @@ -1029,10 +1088,11 @@ __kernel void arithm_s_bitwise_or_with_mask_C4_D5 (__global char *src1, int sr } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_or_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_or_with_mask_C4_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); diff --git a/modules/ocl/src/opencl/arithm_bitwise_xor.cl b/modules/ocl/src/opencl/arithm_bitwise_xor.cl index 4f743776a..c8b00ca39 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_xor.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_xor.cl @@ -43,17 +43,20 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_XOR//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_xor without mask**************************************/ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -61,8 +64,11 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -70,23 +76,23 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; uchar4 src1_data = vload4(0, src1 + src1_index_fix); uchar4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - uchar4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + uchar4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = src1_data ^ src2_data; @@ -101,9 +107,9 @@ __kernel void arithm_bitwise_xor_D0 (__global uchar *src1, int src1_step, int sr __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -111,8 +117,11 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -120,23 +129,23 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; char4 src1_data = vload4(0, src1 + src1_index_fix); char4 src2_data = vload4(0, src2 + src2_index_fix); - if(src1_index < 0) - { - char4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - char4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + char4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + char4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } char4 dst_data = *((__global char4 *)(dst + dst_index)); char4 tmp_data = src1_data ^ src2_data; @@ -151,9 +160,9 @@ __kernel void arithm_bitwise_xor_D1 (__global char *src1, int src1_step, int src __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -162,8 +171,11 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -171,23 +183,23 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; ushort4 src1_data = vload4(0, (__global ushort *)((__global char *)src1 + src1_index_fix)); ushort4 src2_data = vload4(0, (__global ushort *)((__global char *)src2 + src2_index_fix)); - if(src1_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - ushort4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + ushort4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + ushort4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } ushort4 dst_data = *((__global ushort4 *)((__global char *)dst + dst_index)); ushort4 tmp_data = src1_data ^ src2_data; @@ -203,9 +215,9 @@ __kernel void arithm_bitwise_xor_D2 (__global ushort *src1, int src1_step, int s __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -214,8 +226,11 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -223,25 +238,25 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr int dst_end = mad24(y, dst_step, dst_offset + dst_step1); int dst_index = mad24(y, dst_step, dst_offset + (x << 1) & (int)0xfffffff8); - int src1_index_fix = src1_index < 0 ? 0 : src1_index; - int src2_index_fix = src2_index < 0 ? 0 : src2_index; + int src1_index_fix = src1_index < 0 ? 0 : src1_index; + int src2_index_fix = src2_index < 0 ? 0 : src2_index; short4 src1_data = vload4(0, (__global short *)((__global char *)src1 + src1_index_fix)); short4 src2_data = vload4(0, (__global short *)((__global char *)src2 + src2_index_fix)); short4 dst_data = *((__global short4 *)((__global char *)dst + dst_index)); - if(src1_index < 0) - { - short4 tmp; - tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; - src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; - } - if(src2_index < 0) - { - short4 tmp; - tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; - src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } + if(src1_index < 0) + { + short4 tmp; + tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; + src1_data.xyzw = (src1_index == -1) ? src1_data.wxyz:tmp.xyzw; + } + if(src2_index < 0) + { + short4 tmp; + tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; + src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; + } @@ -259,9 +274,9 @@ __kernel void arithm_bitwise_xor_D3 (__global short *src1, int src1_step, int sr __kernel void arithm_bitwise_xor_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -281,9 +296,9 @@ __kernel void arithm_bitwise_xor_D4 (__global int *src1, int src1_step, int src1 } __kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -301,12 +316,11 @@ __kernel void arithm_bitwise_xor_D5 (__global char *src1, int src1_step, int src *((__global char4 *)((__global char *)dst + dst_index)) = tmp; } } - #if defined (DOUBLE_SUPPORT) __kernel void arithm_bitwise_xor_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global char *src2, int src2_step, int src2_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); diff --git a/modules/ocl/src/opencl/arithm_bitwise_xor_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_xor_mask.cl index 4359d860a..48bd3e444 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_xor_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_xor_mask.cl @@ -43,18 +43,22 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_XOR//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_xor with mask**************************************/ -__kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -63,8 +67,11 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1 if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -91,11 +98,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1 -__kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -104,8 +112,11 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -132,11 +143,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_ -__kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -145,8 +157,11 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -171,11 +186,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src -__kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -184,8 +200,11 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -198,8 +217,8 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1 short2 src2_data = vload2(0, (__global short *)((__global char *)src2 + src2_index)); uchar2 mask_data = vload2(0, mask + mask_index); - short2 data = *((__global short2 *)((__global uchar *)dst + dst_index)); - short2 tmp_data = src1_data ^ src2_data; + short2 data = *((__global short2 *)((__global uchar *)dst + dst_index)); + short2 tmp_data = src1_data ^ src2_data; data.x = convert_short((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : data.x; data.y = convert_short((mask_data.y) && (dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.y : data.y; @@ -210,11 +229,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1 -__kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -242,11 +262,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1 -__kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -273,13 +294,13 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_ } - #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C1_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -308,12 +329,12 @@ __kernel void arithm_bitwise_xor_with_mask_C1_D6 (__global char *src1, int src1_ - -__kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -322,8 +343,11 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -347,11 +371,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1 } -__kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -360,8 +385,11 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_ if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -384,11 +412,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_ } } -__kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -413,11 +442,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -442,11 +472,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1 *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -471,11 +502,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1 *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -501,11 +533,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_ } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C2_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -533,12 +566,12 @@ __kernel void arithm_bitwise_xor_with_mask_C2_D6 (__global char *src1, int src1_ #endif - -__kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -547,8 +580,11 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1 if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -596,11 +632,12 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1 } -__kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -609,8 +646,11 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int src2_index = mad24(y, src2_step, (x * 3) + src2_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -657,11 +697,12 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_ } } -__kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -670,8 +711,11 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -701,23 +745,24 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -726,8 +771,11 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1 if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int src2_index = mad24(y, src2_step, (x * 6) + src2_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -757,23 +805,24 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1 data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -808,16 +857,17 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1 data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -852,17 +902,18 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_ data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C3_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -897,20 +948,20 @@ __kernel void arithm_bitwise_xor_with_mask_C3_D6 (__global char *src1, int src1_ data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2; + *((__global char8 *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global char8 *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global char8 *)((__global char *)dst + dst_index + 16))= data_2; } } #endif - -__kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -937,11 +988,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1 } -__kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -967,11 +1019,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_ } } -__kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global ushort *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global ushort *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -996,11 +1049,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global short *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global short *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1025,11 +1079,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1 *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global int *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global int *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1054,11 +1109,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1 *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1084,11 +1140,12 @@ __kernel void arithm_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_ } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_bitwise_xor_with_mask_C4_D6 (__global char *src1, int src1_step, int src1_offset, - __global char *src2, int src2_step, int src2_offset, - __global uchar *mask, int mask_step, int mask_offset, - __global char *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) +__kernel void arithm_bitwise_xor_with_mask_C4_D6 ( + __global char *src1, int src1_step, int src1_offset, + __global char *src2, int src2_step, int src2_offset, + __global uchar *mask, int mask_step, int mask_offset, + __global char *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); diff --git a/modules/ocl/src/opencl/arithm_bitwise_xor_scalar.cl b/modules/ocl/src/opencl/arithm_bitwise_xor_scalar.cl index 318432a18..2c6dd50cd 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_xor_scalar.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_xor_scalar.cl @@ -42,19 +42,21 @@ // the use of this software, even if advised of the possibility of such damage. // // -#if defined (__ATI__) -#pragma OPENCL EXTENSION cl_amd_fp64:enable -#elif defined (__NVIDIA__) +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif - ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_XOR//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************xor with scalar without mask**************************************/ -__kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -62,8 +64,11 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -86,9 +91,10 @@ __kernel void arithm_s_bitwise_xor_C1_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -96,8 +102,11 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -119,9 +128,10 @@ __kernel void arithm_s_bitwise_xor_C1_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -130,8 +140,11 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -150,9 +163,10 @@ __kernel void arithm_s_bitwise_xor_C1_D2 (__global ushort *src1, int src1_step *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -161,8 +175,11 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -181,9 +198,10 @@ __kernel void arithm_s_bitwise_xor_C1_D3 (__global short *src1, int src1_step, *((__global short2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -202,9 +220,10 @@ __kernel void arithm_s_bitwise_xor_C1_D4 (__global int *src1, int src1_step, i *((__global int *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -234,9 +253,10 @@ __kernel void arithm_s_bitwise_xor_C1_D5 (__global char *src1, int src1_step, } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C1_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -256,9 +276,10 @@ __kernel void arithm_s_bitwise_xor_C1_D6 (__global short *src1, int src1_step, i } } #endif -__kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -267,8 +288,11 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -290,9 +314,10 @@ __kernel void arithm_s_bitwise_xor_C2_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -301,8 +326,11 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -322,9 +350,10 @@ __kernel void arithm_s_bitwise_xor_C2_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -343,9 +372,10 @@ __kernel void arithm_s_bitwise_xor_C2_D2 (__global ushort *src1, int src1_step *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -364,9 +394,10 @@ __kernel void arithm_s_bitwise_xor_C2_D3 (__global short *src1, int src1_step, *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -384,9 +415,10 @@ __kernel void arithm_s_bitwise_xor_C2_D4 (__global int *src1, int src1_step, i *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -403,12 +435,13 @@ __kernel void arithm_s_bitwise_xor_C2_D5 (__global char *src1, int src1_step, char8 tmp_data = src1_data ^ src2_data; *((__global char8 *)((__global char *)dst + dst_index)) = tmp_data; - } + } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C2_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -428,9 +461,10 @@ __kernel void arithm_s_bitwise_xor_C2_D6 (__global short *src1, int src1_step, i } } #endif -__kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -439,8 +473,11 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -484,9 +521,10 @@ __kernel void arithm_s_bitwise_xor_C3_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -495,8 +533,11 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int dst_start = mad24(y, dst_step, dst_offset); @@ -539,9 +580,10 @@ __kernel void arithm_s_bitwise_xor_C3_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -550,8 +592,11 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -577,21 +622,22 @@ __kernel void arithm_s_bitwise_xor_C3_D2 (__global ushort *src1, int src1_step data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -600,8 +646,11 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int dst_start = mad24(y, dst_step, dst_offset); @@ -627,21 +676,22 @@ __kernel void arithm_s_bitwise_xor_C3_D3 (__global short *src1, int src1_step, data_0.xy = ((dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -668,14 +718,15 @@ __kernel void arithm_s_bitwise_xor_C3_D4 (__global int *src1, int src1_step, i int tmp_data_1 = src1_data_1 ^ src2_data_1; int tmp_data_2 = src1_data_2 ^ src2_data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } -__kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -702,15 +753,16 @@ __kernel void arithm_s_bitwise_xor_C3_D5 (__global char *src1, int src1_step, char4 tmp_data_1 = src1_data_1 ^ src2_data_1; char4 tmp_data_2 = src1_data_2 ^ src2_data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= tmp_data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= tmp_data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= tmp_data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C3_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -737,15 +789,16 @@ __kernel void arithm_s_bitwise_xor_C3_D6 (__global short *src1, int src1_step, i short4 tmp_data_1 = src1_data_1 ^ src2_data_1; short4 tmp_data_2 = src1_data_2 ^ src2_data_2; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; } } #endif -__kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -765,9 +818,10 @@ __kernel void arithm_s_bitwise_xor_C4_D0 (__global uchar *src1, int src1_step, } -__kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -786,9 +840,10 @@ __kernel void arithm_s_bitwise_xor_C4_D1 (__global char *src1, int src1_step, } } -__kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -806,9 +861,10 @@ __kernel void arithm_s_bitwise_xor_C4_D2 (__global ushort *src1, int src1_step *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -826,9 +882,10 @@ __kernel void arithm_s_bitwise_xor_C4_D3 (__global short *src1, int src1_step, *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -846,9 +903,10 @@ __kernel void arithm_s_bitwise_xor_C4_D4 (__global int *src1, int src1_step, i *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -869,9 +927,10 @@ __kernel void arithm_s_bitwise_xor_C4_D5 (__global char *src1, int src1_step, } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_C4_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -897,11 +956,11 @@ __kernel void arithm_s_bitwise_xor_C4_D6 (__global short *src1, int src1_step, i short4 tmp_data_2 = src1_data_2 ^ src2_data_2; short4 tmp_data_3 = src1_data_3 ^ src2_data_3; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; - *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= tmp_data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= tmp_data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= tmp_data_2; + *((__global short4 *)((__global char *)dst + dst_index + 24))= tmp_data_3; } } -#endif +#endif \ No newline at end of file diff --git a/modules/ocl/src/opencl/arithm_bitwise_xor_scalar_mask.cl b/modules/ocl/src/opencl/arithm_bitwise_xor_scalar_mask.cl index 57ad9ee71..26ca59c3a 100644 --- a/modules/ocl/src/opencl/arithm_bitwise_xor_scalar_mask.cl +++ b/modules/ocl/src/opencl/arithm_bitwise_xor_scalar_mask.cl @@ -42,20 +42,23 @@ // the use of this software, even if advised of the possibility of such damage. // //M*/ -#if defined (__ATI__) -#pragma OPENCL EXTENSION cl_amd_fp64:enable -#elif defined (__NVIDIA__) -#pragma OPENCL EXTENSION cl_khr_fp64:enable -#endif +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 +#pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif +#endif ////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////BITWISE_XOR//////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////////////////////////////// /**************************************bitwise_xor with scalar with mask**************************************/ -__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C1_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -64,8 +67,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -90,10 +96,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C1_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -102,8 +109,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -127,10 +137,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C1_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -139,8 +150,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -161,10 +175,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D2 (__global ushort *src1, int *((__global ushort2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C1_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -173,8 +188,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -195,10 +213,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D3 (__global short *src1, int *((__global short2 *)((__global uchar *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C1_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -223,10 +242,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D4 (__global int *src1, int } } -__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C1_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -252,10 +272,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D5 (__global char *src1, int src } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C1_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -280,10 +301,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C1_D6 (__global short *src1, int sr } } #endif -__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C2_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -292,8 +314,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -316,10 +341,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C2_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -328,8 +354,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s if (x < cols && y < rows) { x = x << 1; - - #define dst_align ((dst_offset >> 1) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 1) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -351,10 +380,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C2_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -378,10 +408,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D2 (__global ushort *src1, int *((__global ushort2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C2_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -405,10 +436,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D3 (__global short *src1, int *((__global short2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C2_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -432,10 +464,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D4 (__global int *src1, int sr *((__global int2 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C2_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -461,10 +494,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D5 (__global char *src1, int s } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C2_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -489,10 +523,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C2_D6 (__global short *src1, int sr } } #endif -__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C3_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -501,8 +536,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -549,10 +587,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C3_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -561,8 +600,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s if (x < cols && y < rows) { x = x << 2; - - #define dst_align (((dst_offset % dst_step) / 3 ) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 3 ) & 3) int src1_index = mad24(y, src1_step, (x * 3) + src1_offset - (dst_align * 3)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -608,10 +650,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C3_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -620,8 +663,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -650,22 +696,23 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D2 (__global ushort *src1, int data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global ushort2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global ushort2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global ushort2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C3_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -674,8 +721,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int if (x < cols && y < rows) { x = x << 1; - - #define dst_align (((dst_offset % dst_step) / 6 ) & 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset % dst_step) / 6 ) & 1) int src1_index = mad24(y, src1_step, (x * 6) + src1_offset - (dst_align * 6)); int mask_index = mad24(y, mask_step, x + mask_offset - dst_align); @@ -704,22 +754,23 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D3 (__global short *src1, int data_0.xy = ((mask_data.x) && (dst_index + 0 >= dst_start)) ? tmp_data_0.xy : data_0.xy; data_1.x = ((mask_data.x) && (dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) - ? tmp_data_1.x : data_1.x; + ? tmp_data_1.x : data_1.x; data_1.y = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_1.y : data_1.y; + ? tmp_data_1.y : data_1.y; data_2.xy = ((mask_data.y) && (dst_index + 6 >= dst_start) && (dst_index + 6 < dst_end)) - ? tmp_data_2.xy : data_2.xy; + ? tmp_data_2.xy : data_2.xy; - *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global short2 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global short2 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global short2 *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C3_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -753,15 +804,16 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D4 (__global int *src1, int sr data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global int *)((__global char *)dst + dst_index + 0))= data_0; - *((__global int *)((__global char *)dst + dst_index + 4))= data_1; - *((__global int *)((__global char *)dst + dst_index + 8))= data_2; + *((__global int *)((__global char *)dst + dst_index + 0))= data_0; + *((__global int *)((__global char *)dst + dst_index + 4))= data_1; + *((__global int *)((__global char *)dst + dst_index + 8))= data_2; } } -__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C3_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -795,16 +847,17 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D5 (__global char *src1, int s data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; - *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; - *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; + *((__global char4 *)((__global char *)dst + dst_index + 0))= data_0; + *((__global char4 *)((__global char *)dst + dst_index + 4))= data_1; + *((__global char4 *)((__global char *)dst + dst_index + 8))= data_2; } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C3_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -838,16 +891,17 @@ __kernel void arithm_s_bitwise_xor_with_mask_C3_D6 (__global short *src1, int sr data_1 = mask_data ? tmp_data_1 : data_1; data_2 = mask_data ? tmp_data_2 : data_2; - *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0; - *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1; - *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2; + *((__global short4 *)((__global char *)dst + dst_index + 0 ))= data_0; + *((__global short4 *)((__global char *)dst + dst_index + 8 ))= data_1; + *((__global short4 *)((__global char *)dst + dst_index + 16))= data_2; } } #endif -__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - uchar4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C4_D0 ( + __global uchar *src1, int src1_step, int src1_offset, + __global uchar *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + uchar4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -872,10 +926,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D0 (__global uchar *src1, int } -__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C4_D1 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -899,10 +954,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D1 (__global char *src1, int s } } -__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - ushort4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C4_D2 ( + __global ushort *src1, int src1_step, int src1_offset, + __global ushort *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + ushort4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -925,10 +981,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D2 (__global ushort *src1, int *((__global ushort4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C4_D3 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -951,10 +1008,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D3 (__global short *src1, int *((__global short4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - int4 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C4_D4 ( + __global int *src1, int src1_step, int src1_offset, + __global int *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + int4 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -977,10 +1035,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D4 (__global int *src1, int sr *((__global int4 *)((__global char *)dst + dst_index)) = data; } } -__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int src1_step, int src1_offset, - __global char *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - char16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C4_D5 ( + __global char *src1, int src1_step, int src1_offset, + __global char *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + char16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -1006,10 +1065,11 @@ __kernel void arithm_s_bitwise_xor_with_mask_C4_D5 (__global char *src1, int s } } #if defined (DOUBLE_SUPPORT) -__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 (__global short *src1, int src1_step, int src1_offset, - __global short *dst, int dst_step, int dst_offset, - __global uchar *mask, int mask_step, int mask_offset, - short16 src2, int rows, int cols, int dst_step1) +__kernel void arithm_s_bitwise_xor_with_mask_C4_D6 ( + __global short *src1, int src1_step, int src1_offset, + __global short *dst, int dst_step, int dst_offset, + __global uchar *mask, int mask_step, int mask_offset, + short16 src2, int rows, int cols, int dst_step1) { int x = get_global_id(0); diff --git a/modules/ocl/src/opencl/arithm_compare_eq.cl b/modules/ocl/src/opencl/arithm_compare_eq.cl index f818532ba..a660d4172 100644 --- a/modules/ocl/src/opencl/arithm_compare_eq.cl +++ b/modules/ocl/src/opencl/arithm_compare_eq.cl @@ -43,7 +43,11 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -51,9 +55,9 @@ /////////////////////////////////////////////////////////////////////////////////////////////////////// __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -61,8 +65,11 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -102,9 +109,9 @@ __kernel void arithm_compare_eq_D0 (__global uchar *src1, int src1_step, int src __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -113,8 +120,11 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1)& 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1)& 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -153,9 +163,9 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -164,8 +174,11 @@ __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -207,9 +220,9 @@ __kernel void arithm_compare_eq_D3 (__global short *src1, int src1_step, int src __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -217,7 +230,10 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_ if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -227,7 +243,7 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_ int src1_index_fix = src1_index < 0 ? 0 : src1_index; int src2_index_fix = src2_index < 0 ? 0 : src2_index; - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); + int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); if(src1_index < 0) { @@ -255,9 +271,9 @@ __kernel void arithm_compare_eq_D4 (__global int *src1, int src1_step, int src1_ } __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global float *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -265,7 +281,10 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -275,7 +294,8 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src int src1_index_fix = src1_index < 0 ? 0 : src1_index; int src2_index_fix = src2_index < 0 ? 0 : src2_index; float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src2_index < 0) + float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); + if(src2_index < 0) { float4 tmp; tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; @@ -297,9 +317,9 @@ __kernel void arithm_compare_eq_D5 (__global float *src1, int src1_step, int src #if defined (DOUBLE_SUPPORT) __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global double *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -307,7 +327,10 @@ __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 3) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 3) & 3) int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); @@ -347,9 +370,9 @@ __kernel void arithm_compare_eq_D6 (__global double *src1, int src1_step, int sr /***********************************Compare GT**************************/ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -357,8 +380,11 @@ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -397,9 +423,9 @@ __kernel void arithm_compare_gt_D0 (__global uchar *src1, int src1_step, int src } __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -408,8 +434,11 @@ __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -450,9 +479,9 @@ __kernel void arithm_compare_gt_D2 (__global ushort *src1, int src1_step, int sr __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -461,8 +490,11 @@ __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -501,9 +533,9 @@ __kernel void arithm_compare_gt_D3 (__global short *src1, int src1_step, int src } __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -511,7 +543,10 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_ if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -521,7 +556,7 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_ int src1_index_fix = src1_index < 0 ? 0 : src1_index; int src2_index_fix = src2_index < 0 ? 0 : src2_index; - int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); + int4 src1_data = vload4(0, (__global int *)((__global char *)src1 + src1_index)); int4 src2_data = vload4(0, (__global int *)((__global char *)src2 + src2_index)); if(src1_index < 0) { @@ -550,9 +585,9 @@ __kernel void arithm_compare_gt_D4 (__global int *src1, int src1_step, int src1_ } __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global float *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -560,7 +595,10 @@ __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -599,9 +637,9 @@ __kernel void arithm_compare_gt_D5 (__global float *src1, int src1_step, int src #if defined (DOUBLE_SUPPORT) __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global double *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -609,7 +647,10 @@ __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 3) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 3) & 3) int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); @@ -649,9 +690,9 @@ __kernel void arithm_compare_gt_D6 (__global double *src1, int src1_step, int sr /***********************************Compare GE**************************/ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -659,8 +700,11 @@ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -702,9 +746,9 @@ __kernel void arithm_compare_ge_D0 (__global uchar *src1, int src1_step, int src __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -713,8 +757,11 @@ __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -757,9 +804,9 @@ __kernel void arithm_compare_ge_D2 (__global ushort *src1, int src1_step, int sr __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -768,8 +815,11 @@ __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1)& 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1)& 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -809,9 +859,9 @@ __kernel void arithm_compare_ge_D3 (__global short *src1, int src1_step, int src } __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -819,8 +869,11 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_ if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 2)& 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2)& 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -845,7 +898,7 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_ tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; } - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); + uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; @@ -858,9 +911,9 @@ __kernel void arithm_compare_ge_D4 (__global int *src1, int src1_step, int src1_ } __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global float *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -868,8 +921,11 @@ __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 2)& 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2)& 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -909,9 +965,9 @@ __kernel void arithm_compare_ge_D5 (__global float *src1, int src1_step, int src #if defined (DOUBLE_SUPPORT) __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global double *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -919,8 +975,11 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 3)& 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 3)& 3) int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); @@ -942,7 +1001,8 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr double4 tmp; tmp.xyzw = (src2_index == -2) ? src2_data.zwxy:src2_data.yzwx; src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; - } uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); + } + uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = convert_uchar4((src1_data >= src2_data)); dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; @@ -954,3 +1014,4 @@ __kernel void arithm_compare_ge_D6 (__global double *src1, int src1_step, int sr } } #endif + diff --git a/modules/ocl/src/opencl/arithm_compare_ne.cl b/modules/ocl/src/opencl/arithm_compare_ne.cl index 713dc1316..f0128846b 100644 --- a/modules/ocl/src/opencl/arithm_compare_ne.cl +++ b/modules/ocl/src/opencl/arithm_compare_ne.cl @@ -43,13 +43,17 @@ // //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif /***********************************Compare NE*******************************/ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -57,8 +61,11 @@ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -98,9 +105,9 @@ __kernel void arithm_compare_ne_D0 (__global uchar *src1, int src1_step, int src __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -109,8 +116,11 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1)& 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1)& 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -150,9 +160,9 @@ __kernel void arithm_compare_ne_D2 (__global ushort *src1, int src1_step, int sr __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -161,8 +171,11 @@ __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1)& 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1)& 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -200,9 +213,9 @@ __kernel void arithm_compare_ne_D3 (__global short *src1, int src1_step, int src } __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -210,7 +223,10 @@ __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_ if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2)& 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2)& 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -249,9 +265,9 @@ __kernel void arithm_compare_ne_D4 (__global int *src1, int src1_step, int src1_ } __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global float *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -259,7 +275,10 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -269,7 +288,8 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src int src1_index_fix = src1_index < 0 ? 0 : src1_index; int src2_index_fix = src2_index < 0 ? 0 : src2_index; float4 src1_data = vload4(0, (__global float *)((__global char *)src1 + src1_index_fix)); - float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); if(src1_index < 0) + float4 src2_data = vload4(0, (__global float *)((__global char *)src2 + src2_index_fix)); + if(src1_index < 0) { float4 tmp; tmp.xyzw = (src1_index == -2) ? src1_data.zwxy:src1_data.yzwx; @@ -282,7 +302,7 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src src2_data.xyzw = (src2_index == -1) ? src2_data.wxyz:tmp.xyzw; } - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); + uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = convert_uchar4((src1_data != src2_data)); dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; @@ -296,9 +316,9 @@ __kernel void arithm_compare_ne_D5 (__global float *src1, int src1_step, int src #if defined (DOUBLE_SUPPORT) __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global double *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -306,7 +326,10 @@ __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 3) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 3) & 3) int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); @@ -347,9 +370,9 @@ __kernel void arithm_compare_ne_D6 (__global double *src1, int src1_step, int sr /***********************************Compare LT*******************************/ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -357,8 +380,11 @@ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -398,9 +424,9 @@ __kernel void arithm_compare_lt_D0 (__global uchar *src1, int src1_step, int src __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -409,8 +435,11 @@ __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -451,9 +480,9 @@ __kernel void arithm_compare_lt_D2 (__global ushort *src1, int src1_step, int sr __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -462,8 +491,11 @@ __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -502,9 +534,9 @@ __kernel void arithm_compare_lt_D3 (__global short *src1, int src1_step, int src } __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -512,7 +544,10 @@ __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_ if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -554,9 +589,9 @@ __kernel void arithm_compare_lt_D4 (__global int *src1, int src1_step, int src1_ } __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global float *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -564,7 +599,10 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2) & 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -589,7 +627,7 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src } - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); + uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; @@ -603,9 +641,9 @@ __kernel void arithm_compare_lt_D5 (__global float *src1, int src1_step, int src #if defined (DOUBLE_SUPPORT) __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global double *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -613,7 +651,10 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 3) & 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 3) & 3) int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); @@ -638,7 +679,7 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr } - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); + uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); uchar4 tmp_data = convert_uchar4((src1_data < src2_data)); dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; @@ -653,9 +694,9 @@ __kernel void arithm_compare_lt_D6 (__global double *src1, int src1_step, int sr /***********************************Compare LE*******************************/ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src1_offset, - __global uchar *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global uchar *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -663,8 +704,11 @@ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -705,9 +749,9 @@ __kernel void arithm_compare_le_D0 (__global uchar *src1, int src1_step, int src __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int src1_offset, - __global ushort *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global ushort *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -716,8 +760,11 @@ __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -758,9 +805,9 @@ __kernel void arithm_compare_le_D2 (__global ushort *src1, int src1_step, int sr __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src1_offset, - __global short *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global short *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); @@ -769,8 +816,11 @@ __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -809,9 +859,9 @@ __kernel void arithm_compare_le_D3 (__global short *src1, int src1_step, int src } __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_offset, - __global int *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global int *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -819,7 +869,10 @@ __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_ if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2)& 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2)& 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -857,9 +910,9 @@ __kernel void arithm_compare_le_D4 (__global int *src1, int src1_step, int src1_ } __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global float *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -867,7 +920,10 @@ __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 2)& 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 2)& 3) int src1_index = mad24(y, src1_step, (x << 2) + src1_offset - (dst_align << 2)); int src2_index = mad24(y, src2_step, (x << 2) + src2_offset - (dst_align << 2)); @@ -905,9 +961,9 @@ __kernel void arithm_compare_le_D5 (__global float *src1, int src1_step, int src #if defined (DOUBLE_SUPPORT) __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int src1_offset, - __global double *src2, int src2_step, int src2_offset, - __global uchar *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1) + __global double *src2, int src2_step, int src2_offset, + __global uchar *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1) { int x = get_global_id(0); int y = get_global_id(1); @@ -915,7 +971,10 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr if (x < cols && y < rows) { x = x << 2; - #define dst_align ((dst_offset >> 3)& 3) +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 3)& 3) int src1_index = mad24(y, src1_step, (x << 3) + src1_offset - (dst_align << 3)); int src2_index = mad24(y, src2_step, (x << 3) + src2_offset - (dst_align << 3)); @@ -952,3 +1011,5 @@ __kernel void arithm_compare_le_D6 (__global double *src1, int src1_step, int sr } } #endif + + diff --git a/modules/ocl/src/opencl/arithm_div.cl b/modules/ocl/src/opencl/arithm_div.cl index dcbe30310..896277cf5 100644 --- a/modules/ocl/src/opencl/arithm_div.cl +++ b/modules/ocl/src/opencl/arithm_div.cl @@ -44,7 +44,11 @@ //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif typedef double F ; typedef double4 F4; #define convert_F4 convert_double4 @@ -56,34 +60,24 @@ typedef float4 F4; #define convert_F float #endif -uchar round2_uchar(F v){ - - uchar v1 = convert_uchar_sat(round(v)); - //uchar v2 = convert_uchar_sat(v+(v>=0 ? 0.5 : -0.5)); - - return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2; +inline uchar round2_uchar(F v) +{ + return convert_uchar_sat(round(v)); } -ushort round2_ushort(F v){ - - ushort v1 = convert_ushort_sat(round(v)); - //ushort v2 = convert_ushort_sat(v+(v>=0 ? 0.5 : -0.5)); - - return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2; +inline ushort round2_ushort(F v) +{ + return convert_ushort_sat(round(v)); } -short round2_short(F v){ - short v1 = convert_short_sat(round(v)); - //short v2 = convert_short_sat(v+(v>=0 ? 0.5 : -0.5)); - - return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2; +inline short round2_short(F v) +{ + return convert_short_sat(round(v)); } -int round2_int(F v){ - int v1 = convert_int_sat(round(v)); - //int v2 = convert_int_sat(v+(v>=0 ? 0.5 : -0.5)); - - return v1;//(((v-v1)==0.5) && (v1%2==0)) ? v1 : v2; +inline int round2_int(F v) +{ + return convert_int_sat(round(v)); } /////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////divide/////////////////////////////////////////////////// @@ -94,39 +88,41 @@ __kernel void arithm_div_D0 (__global uchar *src1, int src1_step, int src1_offse __global uchar *dst, int dst_step, int dst_offset, int rows, int cols, int dst_step1, F scalar) { - int x = get_global_id(0); - int y = get_global_id(1); + int2 coor = (int2)(get_global_id(0), get_global_id(1)); - if (x < cols && y < rows) + if (coor.x < cols && coor.y < rows) { - x = x << 2; + coor.x = coor.x << 2; + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) + int2 src_index = (int2)(mad24(coor.y, src1_step, coor.x + src1_offset - dst_align), + mad24(coor.y, src2_step, coor.x + src2_offset - dst_align)); - #define dst_align (dst_offset & 3) - int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); - int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); + int4 dst_args = (int4)(mad24(coor.y, dst_step, dst_offset), + mad24(coor.y, dst_step, dst_offset + dst_step1), + mad24(coor.y, dst_step, dst_offset + coor.x & (int)0xfffffffc), + 0); - int dst_start = mad24(y, dst_step, dst_offset); - int dst_end = mad24(y, dst_step, dst_offset + dst_step1); - int dst_index = mad24(y, dst_step, dst_offset + x & (int)0xfffffffc); - - uchar4 src1_data = vload4(0, src1 + src1_index); - uchar4 src2_data = vload4(0, src2 + src2_index); - uchar4 dst_data = *((__global uchar4 *)(dst + dst_index)); + uchar4 src1_data = vload4(0, src1 + src_index.x); + uchar4 src2_data = vload4(0, src2 + src_index.y); + uchar4 dst_data = *((__global uchar4 *)(dst + dst_args.z)); F4 tmp = convert_F4(src1_data) * scalar; - uchar4 tmp_data; - tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / (F)src2_data.x); - tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / (F)src2_data.y); - tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / (F)src2_data.z); - tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / (F)src2_data.w); + tmp_data.x = ((tmp.x == 0) || (src2_data.x == 0)) ? 0 : round2_uchar(tmp.x / src2_data.x); + tmp_data.y = ((tmp.y == 0) || (src2_data.y == 0)) ? 0 : round2_uchar(tmp.y / src2_data.y); + tmp_data.z = ((tmp.z == 0) || (src2_data.z == 0)) ? 0 : round2_uchar(tmp.z / src2_data.z); + tmp_data.w = ((tmp.w == 0) || (src2_data.w == 0)) ? 0 : round2_uchar(tmp.w / src2_data.w); - dst_data.x = ((dst_index + 0 >= dst_start) && (dst_index + 0 < dst_end)) ? tmp_data.x : dst_data.x; - dst_data.y = ((dst_index + 1 >= dst_start) && (dst_index + 1 < dst_end)) ? tmp_data.y : dst_data.y; - dst_data.z = ((dst_index + 2 >= dst_start) && (dst_index + 2 < dst_end)) ? tmp_data.z : dst_data.z; - dst_data.w = ((dst_index + 3 >= dst_start) && (dst_index + 3 < dst_end)) ? tmp_data.w : dst_data.w; + dst_data.x = ((dst_args.z + 0 >= dst_args.x) && (dst_args.z + 0 < dst_args.y)) ? tmp_data.x : dst_data.x; + dst_data.y = ((dst_args.z + 1 >= dst_args.x) && (dst_args.z + 1 < dst_args.y)) ? tmp_data.y : dst_data.y; + dst_data.z = ((dst_args.z + 2 >= dst_args.x) && (dst_args.z + 2 < dst_args.y)) ? tmp_data.z : dst_data.z; + dst_data.w = ((dst_args.z + 3 >= dst_args.x) && (dst_args.z + 3 < dst_args.y)) ? tmp_data.w : dst_data.w; - *((__global uchar4 *)(dst + dst_index)) = dst_data; + *((__global uchar4 *)(dst + dst_args.z)) = dst_data; } } @@ -141,8 +137,11 @@ __kernel void arithm_div_D2 (__global ushort *src1, int src1_step, int src1_offs if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -181,8 +180,11 @@ __kernel void arithm_div_D3 (__global short *src1, int src1_step, int src1_offse if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -296,8 +298,11 @@ __kernel void arithm_s_div_D0 (__global uchar *src, int src_step, int src_offset if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src_index = mad24(y, src_step, x + src_offset - dst_align); int dst_start = mad24(y, dst_step, dst_offset); @@ -332,8 +337,11 @@ __kernel void arithm_s_div_D2 (__global ushort *src, int src_step, int src_offse if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -367,8 +375,11 @@ __kernel void arithm_s_div_D3 (__global short *src, int src_step, int src_offset if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src_index = mad24(y, src_step, (x << 1) + src_offset - (dst_align << 1)); int dst_start = mad24(y, dst_step, dst_offset); @@ -455,3 +466,5 @@ __kernel void arithm_s_div_D6 (__global double *src, int src_step, int src_offse } } #endif + + diff --git a/modules/ocl/src/opencl/arithm_flip.cl b/modules/ocl/src/opencl/arithm_flip.cl index f4925244a..821a84ab7 100644 --- a/modules/ocl/src/opencl/arithm_flip.cl +++ b/modules/ocl/src/opencl/arithm_flip.cl @@ -44,7 +44,11 @@ //M*/ #if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif ////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -60,8 +64,11 @@ __kernel void arithm_flip_rows_D0 (__global uchar *src, int src_step, int src_of if (x < cols && y < thread_rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src_index_0 = mad24(y, src_step, x + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align); @@ -115,8 +122,11 @@ __kernel void arithm_flip_rows_D1 (__global char *src, int src_step, int src_off if (x < cols && y < thread_rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src_index_0 = mad24(y, src_step, x + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, x + src_offset - dst_align); @@ -157,8 +167,11 @@ __kernel void arithm_flip_rows_D2 (__global ushort *src, int src_step, int src_o if (x < cols && y < thread_rows) { x = x << 2; - - #define dst_align (((dst_offset >> 1) & 3) << 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset >> 1) & 3) << 1) int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align); @@ -199,8 +212,11 @@ __kernel void arithm_flip_rows_D3 (__global short *src, int src_step, int src_of if (x < cols && y < thread_rows) { x = x << 2; - - #define dst_align (((dst_offset >> 1) & 3) << 1) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (((dst_offset >> 1) & 3) << 1) int src_index_0 = mad24(y, src_step, (x << 1) + src_offset - dst_align); int src_index_1 = mad24(rows - y - 1, src_step, (x << 1) + src_offset - dst_align); diff --git a/modules/ocl/src/opencl/arithm_mul.cl b/modules/ocl/src/opencl/arithm_mul.cl index f9f3936a4..e1cc9f6ab 100644 --- a/modules/ocl/src/opencl/arithm_mul.cl +++ b/modules/ocl/src/opencl/arithm_mul.cl @@ -16,7 +16,6 @@ // // @Authors // Jia Haipeng, jiahaipeng95@gmail.com -// Dachuan Zhao, dachuan@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -44,11 +43,16 @@ // //M*/ -#if defined DOUBLE_SUPPORT +#if defined (DOUBLE_SUPPORT) +#ifdef cl_khr_fp64 #pragma OPENCL EXTENSION cl_khr_fp64:enable +#elif defined (cl_amd_fp64) +#pragma OPENCL EXTENSION cl_amd_fp64:enable +#endif #endif -int4 round_int4(float4 v){ +int4 round_int4(float4 v) +{ v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5); v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5); v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5); @@ -56,7 +60,8 @@ int4 round_int4(float4 v){ return convert_int4_sat(v); } -uint4 round_uint4(float4 v){ +uint4 round_uint4(float4 v) +{ v.s0 = v.s0 + (v.s0 > 0 ? 0.5 : -0.5); v.s1 = v.s1 + (v.s1 > 0 ? 0.5 : -0.5); v.s2 = v.s2 + (v.s2 > 0 ? 0.5 : -0.5); @@ -64,7 +69,8 @@ uint4 round_uint4(float4 v){ return convert_uint4_sat(v); } -long round_int(float v){ +long round_int(float v) +{ v = v + (v > 0 ? 0.5 : -0.5); return convert_int_sat(v); @@ -84,8 +90,11 @@ __kernel void arithm_mul_D0 (__global uchar *src1, int src1_step, int src1_offse if (x < cols && y < rows) { x = x << 2; - - #define dst_align (dst_offset & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align (dst_offset & 3) int src1_index = mad24(y, src1_step, x + src1_offset - dst_align); int src2_index = mad24(y, src2_step, x + src2_offset - dst_align); @@ -129,8 +138,11 @@ __kernel void arithm_mul_D2 (__global ushort *src1, int src1_step, int src1_offs if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -165,8 +177,11 @@ __kernel void arithm_mul_D3 (__global short *src1, int src1_step, int src1_offse if (x < cols && y < rows) { x = x << 2; - - #define dst_align ((dst_offset >> 1) & 3) + +#ifdef dst_align +#undef dst_align +#endif +#define dst_align ((dst_offset >> 1) & 3) int src1_index = mad24(y, src1_step, (x << 1) + src1_offset - (dst_align << 1)); int src2_index = mad24(y, src2_step, (x << 1) + src2_offset - (dst_align << 1)); @@ -263,8 +278,8 @@ __kernel void arithm_mul_D6 (__global double *src1, int src1_step, int src1_offs #endif __kernel void arithm_muls_D5 (__global float *src1, int src1_step, int src1_offset, - __global float *dst, int dst_step, int dst_offset, - int rows, int cols, int dst_step1, float scalar) + __global float *dst, int dst_step, int dst_offset, + int rows, int cols, int dst_step1, float scalar) { int x = get_global_id(0); int y = get_global_id(1); From 18b2d6bdbbbcb28dfbf6dad4b9058b909228de25 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Fri, 5 Apr 2013 08:50:08 +0400 Subject: [PATCH 63/67] copying '.classpath' and '.project' to build dir, useful for opening in eclipse --- modules/java/android_test/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/java/android_test/CMakeLists.txt b/modules/java/android_test/CMakeLists.txt index 06ebf4aa1..41f69e6ca 100644 --- a/modules/java/android_test/CMakeLists.txt +++ b/modules/java/android_test/CMakeLists.txt @@ -14,7 +14,7 @@ ocv_list_filterout(opencv_test_java_files ".svn") # copy sources out from the build tree set(opencv_test_java_file_deps "") -foreach(f ${opencv_test_java_files} ${ANDROID_MANIFEST_FILE}) +foreach(f ${opencv_test_java_files} ${ANDROID_MANIFEST_FILE} ".classpath" ".project") add_custom_command( OUTPUT "${opencv_test_java_bin_dir}/${f}" COMMAND ${CMAKE_COMMAND} -E copy "${CMAKE_CURRENT_SOURCE_DIR}/${f}" "${opencv_test_java_bin_dir}/${f}" From 09fe5cddf185c641e4b4bbf4c11c1f57771c72d3 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Fri, 5 Apr 2013 08:50:24 +0400 Subject: [PATCH 64/67] test for the issue #2901 --- .../BruteForceHammingDescriptorMatcherTest.java | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/java/android_test/src/org/opencv/test/features2d/BruteForceHammingDescriptorMatcherTest.java b/modules/java/android_test/src/org/opencv/test/features2d/BruteForceHammingDescriptorMatcherTest.java index 08ff220ed..d09515c76 100644 --- a/modules/java/android_test/src/org/opencv/test/features2d/BruteForceHammingDescriptorMatcherTest.java +++ b/modules/java/android_test/src/org/opencv/test/features2d/BruteForceHammingDescriptorMatcherTest.java @@ -1,5 +1,6 @@ package org.opencv.test.features2d; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; @@ -204,7 +205,17 @@ public class BruteForceHammingDescriptorMatcherTest extends OpenCVTestCase { } public void testRadiusMatchMatListOfListOfDMatchFloat() { - fail("Not yet implemented"); + Mat train = getTrainDescriptors(); + Mat query = getQueryDescriptors(); + ArrayList matches = new ArrayList(); + + matcher.radiusMatch(query, train, matches, 50.f); + + assertEquals(matches.size(), 4); + assertTrue(matches.get(0).empty()); + assertMatEqual(matches.get(1), new MatOfDMatch(truth[1]), EPS); + assertMatEqual(matches.get(2), new MatOfDMatch(truth[2]), EPS); + assertTrue(matches.get(3).empty()); } public void testRadiusMatchMatListOfListOfDMatchFloatListOfMat() { From 4c31c26acf51743c7b54a566768eac069ed332dc Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Fri, 5 Apr 2013 08:50:37 +0400 Subject: [PATCH 65/67] fix for #2901 (en exception was raised when getting empty MatOfDMatch) --- modules/java/generator/src/java/core+MatOfDMatch.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/java/generator/src/java/core+MatOfDMatch.java b/modules/java/generator/src/java/core+MatOfDMatch.java index 7a3094933..4a161017d 100644 --- a/modules/java/generator/src/java/core+MatOfDMatch.java +++ b/modules/java/generator/src/java/core+MatOfDMatch.java @@ -16,8 +16,8 @@ public class MatOfDMatch extends Mat { protected MatOfDMatch(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) - throw new IllegalArgumentException("Incomatible Mat"); + if( !empty() && checkVector(_channels, _depth) < 0 ) + throw new IllegalArgumentException("Incomatible Mat: " + toString()); //FIXME: do we need release() here? } @@ -27,8 +27,8 @@ public class MatOfDMatch extends Mat { public MatOfDMatch(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) - throw new IllegalArgumentException("Incomatible Mat"); + if( !empty() && checkVector(_channels, _depth) < 0 ) + throw new IllegalArgumentException("Incomatible Mat: " + toString()); //FIXME: do we need release() here? } From 3400d83778130a620fad45b48ee20c00deac4cdb Mon Sep 17 00:00:00 2001 From: Andrey Kamaev Date: Fri, 5 Apr 2013 11:01:28 +0400 Subject: [PATCH 66/67] Workaround hanging of ocl module when ocl API in not really used --- modules/ocl/src/initialization.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index b582f1ce3..ba69573ad 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -913,6 +913,7 @@ namespace cv auto_ptr Context::clCxt; int Context::val = 0; static Mutex cs; + static volatile int context_tear_down = 0; Context* Context::getContext() { if(*((volatile int*)&val) != 1) @@ -920,9 +921,10 @@ namespace cv AutoLock al(cs); if(*((volatile int*)&val) != 1) { + if (context_tear_down) + return clCxt.get(); if( 0 == clCxt.get()) clCxt.reset(new Context); - std::vector oclinfo; CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0); oclinfo[0].impl->setDevice(0, 0, 0); @@ -1045,9 +1047,14 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID ) { // application hangs if call clReleaseCommandQueue here, so release context only // without context release application hangs as well - cl_context ctx = (cl_context)getoclContext(); - if(ctx) - openCLSafeCall(clReleaseContext(ctx)); + context_tear_down = 1; + Context* cv_ctx = Context::getContext(); + if(cv_ctx) + { + cl_context ctx = (cl_context)&(cv_ctx->impl->oclcontext); + if(ctx) + openCLSafeCall(clReleaseContext(ctx)); + } } return TRUE; } From f89cc191a462ecf360a59aa072c9df3c8b19aaaf Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Fri, 5 Apr 2013 12:17:45 +0400 Subject: [PATCH 67/67] Waning fixes for Android samples --- samples/android/native-activity/jni/native.cpp | 2 +- samples/android/tutorial-2-mixedprocessing/jni/jni_part.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/samples/android/native-activity/jni/native.cpp b/samples/android/native-activity/jni/native.cpp index 38dda0603..66bc006db 100644 --- a/samples/android/native-activity/jni/native.cpp +++ b/samples/android/native-activity/jni/native.cpp @@ -28,7 +28,7 @@ struct Engine cv::Ptr capture; }; -cv::Size calc_optimal_camera_resolution(const char* supported, int width, int height) +static cv::Size calc_optimal_camera_resolution(const char* supported, int width, int height) { int frame_width = 0; int frame_height = 0; diff --git a/samples/android/tutorial-2-mixedprocessing/jni/jni_part.cpp b/samples/android/tutorial-2-mixedprocessing/jni/jni_part.cpp index e7ed75d21..f8e3ada72 100644 --- a/samples/android/tutorial-2-mixedprocessing/jni/jni_part.cpp +++ b/samples/android/tutorial-2-mixedprocessing/jni/jni_part.cpp @@ -8,7 +8,7 @@ using namespace std; using namespace cv; extern "C" { -JNIEXPORT void JNICALL Java_org_opencv_samples_tutorial2_Tuturial2Activity_FindFeatures(JNIEnv*, jobject, jlong addrGray, jlong addrRgba); +JNIEXPORT void JNICALL Java_org_opencv_samples_tutorial2_Tutorial2Activity_FindFeatures(JNIEnv*, jobject, jlong addrGray, jlong addrRgba); JNIEXPORT void JNICALL Java_org_opencv_samples_tutorial2_Tutorial2Activity_FindFeatures(JNIEnv*, jobject, jlong addrGray, jlong addrRgba) {