From 4d510be2c95a95fa325c2452ea03f7346d66b4b8 Mon Sep 17 00:00:00 2001 From: Alexander Mordvintsev Date: Sat, 23 Mar 2013 14:06:33 +0400 Subject: [PATCH 001/121] added compute function to Feature2D (dirty workaround for #2699) --- modules/features2d/include/opencv2/features2d.hpp | 2 ++ modules/features2d/src/descriptors.cpp | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/modules/features2d/include/opencv2/features2d.hpp b/modules/features2d/include/opencv2/features2d.hpp index 8f32445ca..1104c0b81 100644 --- a/modules/features2d/include/opencv2/features2d.hpp +++ b/modules/features2d/include/opencv2/features2d.hpp @@ -263,6 +263,8 @@ public: OutputArray descriptors, bool useProvidedKeypoints=false ) const = 0; + CV_WRAP void compute( const Mat& image, CV_OUT CV_IN_OUT std::vector& keypoints, CV_OUT Mat& descriptors ) const; + // Create feature detector and descriptor extractor by name. CV_WRAP static Ptr create( const std::string& name ); }; diff --git a/modules/features2d/src/descriptors.cpp b/modules/features2d/src/descriptors.cpp index 7f87bd5eb..34754cb45 100644 --- a/modules/features2d/src/descriptors.cpp +++ b/modules/features2d/src/descriptors.cpp @@ -104,6 +104,12 @@ Ptr DescriptorExtractor::create(const std::string& descript return Algorithm::create("Feature2D." + descriptorExtractorType); } + +CV_WRAP void Feature2D::compute( const Mat& image, CV_OUT CV_IN_OUT std::vector& keypoints, CV_OUT Mat& descriptors ) const +{ + DescriptorExtractor::compute(image, keypoints, descriptors); +} + ///////////////////////////////////////////////////////////////////////////////////////////////////////////////// /****************************************************************************************\ From 9247ad634f70bb9bc3ba6d0bda9c66356d1a833a Mon Sep 17 00:00:00 2001 From: Dominik Rose Date: Tue, 14 May 2013 16:20:01 +0200 Subject: [PATCH 002/121] libd1394 2.x support for mingw on windows added --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93549c943..e23941c24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,7 +110,7 @@ endif() # Optional 3rd party components # =================================================== -OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (UNIX AND NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O" ON IF IOS) OCV_OPTION(WITH_CARBON "Use Carbon for UI instead of Cocoa" OFF IF APPLE ) OCV_OPTION(WITH_CUDA "Include NVidia Cuda Runtime support" ON IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) ) From 86ec9b79fd4c69c66fcd9ea9c600c7b60824b135 Mon Sep 17 00:00:00 2001 From: Dominik Rose Date: Tue, 14 May 2013 16:20:01 +0200 Subject: [PATCH 003/121] libd1394 2.x support for mingw on windows added --- CMakeLists.txt | 15 ++++++++++- cmake/OpenCVFindLibsVideo.cmake | 37 +++++++++++++++++++++++---- cmake/OpenCVUtils.cmake | 7 +++++ modules/highgui/src/cap_dc1394_v2.cpp | 11 +++++++- 4 files changed, 63 insertions(+), 7 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 93549c943..35bfaf2f3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,6 +103,19 @@ if(UNIX AND NOT ANDROID) endif() endif() +# Add these standard paths to the search paths for FIND_PATH +# to find include files from these locations first +if(MINGW) + if(EXISTS /mingw) + list(APPEND CMAKE_LIBRARY_PATH /mingw) + endif() + if(EXISTS /mingw32) + list(APPEND CMAKE_LIBRARY_PATH /mingw32) + endif() + if(EXISTS /mingw64) + list(APPEND CMAKE_LIBRARY_PATH /mingw64) + endif() +endif() # ---------------------------------------------------------------------------- # OpenCV cmake options @@ -110,7 +123,7 @@ endif() # Optional 3rd party components # =================================================== -OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (UNIX AND NOT ANDROID AND NOT IOS) ) +OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O" ON IF IOS) OCV_OPTION(WITH_CARBON "Use Carbon for UI instead of Cocoa" OFF IF APPLE ) OCV_OPTION(WITH_CUDA "Include NVidia Cuda Runtime support" ON IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) ) diff --git a/cmake/OpenCVFindLibsVideo.cmake b/cmake/OpenCVFindLibsVideo.cmake index fbb47d486..599bd9ff6 100644 --- a/cmake/OpenCVFindLibsVideo.cmake +++ b/cmake/OpenCVFindLibsVideo.cmake @@ -81,10 +81,37 @@ endif(WITH_GIGEAPI) # --- Dc1394 --- ocv_clear_vars(HAVE_DC1394 HAVE_DC1394_2) if(WITH_1394) - CHECK_MODULE(libdc1394-2 HAVE_DC1394_2) - if(NOT HAVE_DC1394_2) - CHECK_MODULE(libdc1394 HAVE_DC1394) - endif() + if(WIN32) + if(MINGW) + find_path(CMU1394_INCLUDE_PATH "/1394common.h" + PATH_SUFFIXES include + DOC "The path to cmu1394 headers") + find_path(DC1394_2_INCLUDE_PATH "/dc1394/dc1394.h" + PATH_SUFFIXES include + DOC "The path to DC1394 2.x headers") + if(CMU1394_INCLUDE_PATH AND DC1394_2_INCLUDE_PATH) + set(CMU1394_LIB_DIR "${CMU1394_INCLUDE_PATH}/../lib" CACHE PATH "Full path of CMU1394 library directory") + set(DC1394_2_LIB_DIR "${DC1394_2_INCLUDE_PATH}/../lib" CACHE PATH "Full path of DC1394 2.x library directory") + if(EXISTS "${CMU1394_LIB_DIR}/lib1394camera.a" AND EXISTS "${DC1394_2_LIB_DIR}/libdc1394.a") + set(HAVE_DC1394_2 TRUE) + endif() + endif() + if(HAVE_DC1394_2) + ocv_parse_pkg("libdc1394-2" "${DC1394_2_LIB_DIR}/pkgconfig" "") + ocv_include_directories(${DC1394_2_INCLUDE_PATH}) + set(HIGHGUI_LIBRARIES ${HIGHGUI_LIBRARIES} + "${DC1394_2_LIB_DIR}/libdc1394.a" + "${CMU1394_LIB_DIR}/lib1394camera.a") + endif(HAVE_DC1394_2) + else(MINGW) + message(STATUS "libdc1394 compilation is disabled (due to only MinGW compiler supported on your platform).") + endif(MINGW) + else(WIN32) + CHECK_MODULE(libdc1394-2 HAVE_DC1394_2) + if(NOT HAVE_DC1394_2) + CHECK_MODULE(libdc1394 HAVE_DC1394) + endif() + endif(WIN32) endif(WITH_1394) # --- xine --- @@ -197,7 +224,7 @@ endif(WITH_MSMF) # --- Extra HighGUI libs on Windows --- if(WIN32) - list(APPEND HIGHGUI_LIBRARIES comctl32 gdi32 ole32 vfw32) + list(APPEND HIGHGUI_LIBRARIES comctl32 gdi32 ole32 setupapi ws2_32 vfw32) if(MINGW64) list(APPEND HIGHGUI_LIBRARIES avifil32 avicap32 winmm msvfw32) list(REMOVE_ITEM HIGHGUI_LIBRARIES vfw32) diff --git a/cmake/OpenCVUtils.cmake b/cmake/OpenCVUtils.cmake index db24c9970..e6fcda6b2 100644 --- a/cmake/OpenCVUtils.cmake +++ b/cmake/OpenCVUtils.cmake @@ -511,6 +511,13 @@ macro(ocv_parse_header2 LIBNAME HDR_PATH VARNAME) endif() endmacro() +# read single version info from the pkg file +macro(ocv_parse_pkg LIBNAME PKG_PATH SCOPE) + if(EXISTS "${PKG_PATH}/${LIBNAME}.pc") + file(STRINGS "${PKG_PATH}/${LIBNAME}.pc" line_to_parse REGEX "^Version:[ \t]+[0-9.]*.*$" LIMIT_COUNT 1) + STRING(REGEX REPLACE ".*Version: ([^ ]+).*" "\\1" ALIASOF_${LIBNAME}_VERSION "${line_to_parse}" ) + endif() +endmacro() ################################################################################################ # short command to setup source group diff --git a/modules/highgui/src/cap_dc1394_v2.cpp b/modules/highgui/src/cap_dc1394_v2.cpp index 2aa494fac..ea7e4b2b8 100644 --- a/modules/highgui/src/cap_dc1394_v2.cpp +++ b/modules/highgui/src/cap_dc1394_v2.cpp @@ -45,7 +45,16 @@ #include #include -#include +#ifdef WIN32 + // On Windows, we have no sys/select.h, but we need to pick up + // select() which is in winsock2. + #ifndef __SYS_SELECT_H__ + #define __SYS_SELECT_H__ 1 + #include + #endif +#else + #include +#endif /*WIN32*/ #include #include #include From 66c9029fd5937e368e31ea1e2cbcafdb525f94da Mon Sep 17 00:00:00 2001 From: Dominik Rose Date: Wed, 15 May 2013 12:15:16 +0200 Subject: [PATCH 004/121] libdc1394 - removed validation for msvc compiler in CMakeLists.txt --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 37f124824..e941f59a3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -123,7 +123,7 @@ endif() # Optional 3rd party components # =================================================== -OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (NOT ANDROID AND NOT IOS AND NOT MSVC) ) +OCV_OPTION(WITH_1394 "Include IEEE1394 support" ON IF (NOT ANDROID AND NOT IOS) ) OCV_OPTION(WITH_AVFOUNDATION "Use AVFoundation for Video I/O" ON IF IOS) OCV_OPTION(WITH_CARBON "Use Carbon for UI instead of Cocoa" OFF IF APPLE ) OCV_OPTION(WITH_CUDA "Include NVidia Cuda Runtime support" ON IF (CMAKE_VERSION VERSION_GREATER "2.8" AND NOT ANDROID AND NOT IOS) ) From 44a2b109b77a71a5461d11e8fa430e371b5b7f34 Mon Sep 17 00:00:00 2001 From: alex77git Date: Mon, 20 May 2013 02:24:09 +0200 Subject: [PATCH 005/121] Bug #2967, basic_structures.rst, fix 2 typos --- modules/core/doc/basic_structures.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/core/doc/basic_structures.rst b/modules/core/doc/basic_structures.rst index ca9f5e21a..91a61821a 100644 --- a/modules/core/doc/basic_structures.rst +++ b/modules/core/doc/basic_structures.rst @@ -1682,7 +1682,7 @@ Returns the type of a matrix element. .. ocv:function:: int Mat::type() const -The method returns a matrix element type. This is an identifier compatible with the ``CvMat`` type system, like ``CV_16SC3`` or 16-bit signed 3-channel array, and so on. +The method returns a matrix element type. This is an identifier compatible with the ``CvMat`` type system, like ``CV_16SC3`` for 16-bit signed 3-channel array, and so on. Mat::depth @@ -1691,7 +1691,7 @@ Returns the depth of a matrix element. .. ocv:function:: int Mat::depth() const -The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed 3-channel array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values: +The method returns the identifier of the matrix element depth (the type of each individual channel). For example, for a 16-bit signed element array, the method returns ``CV_16S`` . A complete list of matrix types contains the following values: * ``CV_8U`` - 8-bit unsigned integers ( ``0..255`` ) From 6e7b1ef252bfede93e2805922f3d5d215645affc Mon Sep 17 00:00:00 2001 From: alex77git Date: Mon, 20 May 2013 02:26:58 +0200 Subject: [PATCH 006/121] Bug #2967, void DescriptorMatcher::radiusMatch() // description unclear, only file: common_interfaces_of_descriptor_matchers.rst --- .../features2d/doc/common_interfaces_of_descriptor_matchers.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst index 8596ae43d..d7e5eb4c2 100644 --- a/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst +++ b/modules/features2d/doc/common_interfaces_of_descriptor_matchers.rst @@ -217,7 +217,7 @@ For each query descriptor, finds the training descriptors not farther than the s :param compactResult: Parameter used when the mask (or masks) is not empty. If ``compactResult`` is false, the ``matches`` vector has the same size as ``queryDescriptors`` rows. If ``compactResult`` is true, the ``matches`` vector does not contain matches for fully masked-out query descriptors. - :param maxDistance: Threshold for the distance between matched descriptors. + :param maxDistance: Threshold for the distance between matched descriptors. Distance means here metric distance (e.g. Hamming distance), not the distance between coordinates (which is measured in Pixels)! For each query descriptor, the methods find such training descriptors that the distance between the query descriptor and the training descriptor is equal or smaller than ``maxDistance``. Found matches are returned in the distance increasing order. From bc59428b3a363967d9097d1559a9a7601fd5a179 Mon Sep 17 00:00:00 2001 From: alex77git Date: Mon, 20 May 2013 02:28:40 +0200 Subject: [PATCH 007/121] Bug #2966, insert CV_Assert(size.width>0 && size.height>0); in imshow() --- modules/highgui/src/window.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index 6d2953464..6801bd8a4 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -256,12 +256,17 @@ namespace void cv::imshow( const string& winname, InputArray _img ) { + const Size size = _img.size(); #ifndef HAVE_OPENGL - Mat img = _img.getMat(); - CvMat c_img = img; - cvShowImage(winname.c_str(), &c_img); + CV_Assert(size.width>0 && size.height>0); + { + Mat img = _img.getMat(); + CvMat c_img = img; + cvShowImage(winname.c_str(), &c_img); + } #else const double useGl = getWindowProperty(winname, WND_PROP_OPENGL); + CV_Assert(size.width>0 && size.height>0); if (useGl <= 0) { @@ -275,7 +280,6 @@ void cv::imshow( const string& winname, InputArray _img ) if (autoSize > 0) { - Size size = _img.size(); resizeWindow(winname, size.width, size.height); } From c8abaea368fbfb5a2437ed899f8088fb798f6ba6 Mon Sep 17 00:00:00 2001 From: alex77git Date: Mon, 20 May 2013 12:06:25 +0200 Subject: [PATCH 008/121] (tab to space) 2x --- modules/highgui/src/window.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/highgui/src/window.cpp b/modules/highgui/src/window.cpp index 6801bd8a4..1e47bf6ee 100644 --- a/modules/highgui/src/window.cpp +++ b/modules/highgui/src/window.cpp @@ -259,11 +259,11 @@ void cv::imshow( const string& winname, InputArray _img ) const Size size = _img.size(); #ifndef HAVE_OPENGL CV_Assert(size.width>0 && size.height>0); - { + { Mat img = _img.getMat(); CvMat c_img = img; cvShowImage(winname.c_str(), &c_img); - } + } #else const double useGl = getWindowProperty(winname, WND_PROP_OPENGL); CV_Assert(size.width>0 && size.height>0); From 445860d61902c5fa563beaf1eda25731fbecfadb Mon Sep 17 00:00:00 2001 From: alex77git Date: Mon, 20 May 2013 13:19:36 +0200 Subject: [PATCH 009/121] (typo) --- modules/core/doc/basic_structures.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/core/doc/basic_structures.rst b/modules/core/doc/basic_structures.rst index 91a61821a..acfbb911d 100644 --- a/modules/core/doc/basic_structures.rst +++ b/modules/core/doc/basic_structures.rst @@ -1682,7 +1682,7 @@ Returns the type of a matrix element. .. ocv:function:: int Mat::type() const -The method returns a matrix element type. This is an identifier compatible with the ``CvMat`` type system, like ``CV_16SC3`` for 16-bit signed 3-channel array, and so on. +The method returns a matrix element type. This is an identifier compatible with the ``CvMat`` type system, like ``CV_16SC3`` or 16-bit signed 3-channel array, and so on. Mat::depth From 9a1cc06ebe8039ee5be508f4bade8e038c1a31e8 Mon Sep 17 00:00:00 2001 From: Leszek Swirski Date: Tue, 21 May 2013 17:53:36 +0100 Subject: [PATCH 010/121] Fix pixel value rendering for non-fixed-size QT windows --- modules/highgui/src/window_QT.cpp | 67 +++++++++++++++---------------- 1 file changed, 33 insertions(+), 34 deletions(-) diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index 50f2b9e78..438c356f7 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -2651,17 +2651,16 @@ void DefaultViewPort::paintEvent(QPaintEvent* evnt) //Now disable matrixWorld for overlay display myPainter.setWorldMatrixEnabled(false); + //overlay pixel values if zoomed in far enough + if (param_matrixWorld.m11()*ratioX >= threshold_zoom_img_region && + param_matrixWorld.m11()*ratioY >= threshold_zoom_img_region) + { + drawImgRegion(&myPainter); + } + //in mode zoom/panning if (param_matrixWorld.m11() > 1) { - if (param_matrixWorld.m11() >= threshold_zoom_img_region) - { - if (centralWidget->param_flags == CV_WINDOW_NORMAL) - startDisplayInfo("WARNING: The values displayed are the resized image's values. If you want the original image's values, use CV_WINDOW_AUTOSIZE", 1000); - - drawImgRegion(&myPainter); - } - drawViewOverview(&myPainter); } @@ -2887,22 +2886,24 @@ void DefaultViewPort::drawStatusBar() //accept only CV_8UC1 and CV_8UC8 image for now void DefaultViewPort::drawImgRegion(QPainter *painter) { - if (nbChannelOriginImage!=CV_8UC1 && nbChannelOriginImage!=CV_8UC3) return; - qreal offsetX = param_matrixWorld.dx()/param_matrixWorld.m11(); + double pixel_width = param_matrixWorld.m11()*ratioX; + double pixel_height = param_matrixWorld.m11()*ratioY; + + qreal offsetX = param_matrixWorld.dx()/pixel_width; offsetX = offsetX - floor(offsetX); - qreal offsetY = param_matrixWorld.dy()/param_matrixWorld.m11(); + qreal offsetY = param_matrixWorld.dy()/pixel_height; offsetY = offsetY - floor(offsetY); QSize view = size(); QVarLengthArray linesX; - for (qreal _x = offsetX*param_matrixWorld.m11(); _x < view.width(); _x += param_matrixWorld.m11() ) + for (qreal _x = offsetX*pixel_width; _x < view.width(); _x += pixel_width ) linesX.append(QLineF(_x, 0, _x, view.height())); QVarLengthArray linesY; - for (qreal _y = offsetY*param_matrixWorld.m11(); _y < view.height(); _y += param_matrixWorld.m11() ) + for (qreal _y = offsetY*pixel_height; _y < view.height(); _y += pixel_height ) linesY.append(QLineF(0, _y, view.width(), _y)); @@ -2910,27 +2911,25 @@ void DefaultViewPort::drawImgRegion(QPainter *painter) int original_font_size = f.pointSize(); //change font size //f.setPointSize(4+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); - f.setPixelSize(10+(param_matrixWorld.m11()-threshold_zoom_img_region)/5); + f.setPixelSize(10+(pixel_height-threshold_zoom_img_region)/5); painter->setFont(f); - QString val; - QRgb rgbValue; - QPointF point1;//sorry, I do not know how to name it - QPointF point2;//idem - for (int j=-1;j= 0 && point2.y() >= 0) - rgbValue = image2Draw_qt_resized.pixel(QPoint(point2.x(),point2.y())); + QRgb rgbValue; + if (image2Draw_qt.valid(point_in_image)) + rgbValue = image2Draw_qt.pixel(point_in_image); else rgbValue = qRgb(0,0,0); @@ -2943,29 +2942,29 @@ void DefaultViewPort::drawImgRegion(QPainter *painter) painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/2), Qt::AlignCenter, val); */ + QString val; val = tr("%1").arg(qRed(rgbValue)); painter->setPen(QPen(Qt::red, 1)); - painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height/3), Qt::AlignCenter, val); val = tr("%1").arg(qGreen(rgbValue)); painter->setPen(QPen(Qt::green, 1)); - painter->drawText(QRect(point1.x(),point1.y()+param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+pixel_height/3,pixel_width,pixel_height/3), Qt::AlignCenter, val); val = tr("%1").arg(qBlue(rgbValue)); painter->setPen(QPen(Qt::blue, 1)); - painter->drawText(QRect(point1.x(),point1.y()+2*param_matrixWorld.m11()/3,param_matrixWorld.m11(),param_matrixWorld.m11()/3), + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y()+2*pixel_height/3,pixel_width,pixel_height/3), Qt::AlignCenter, val); } if (nbChannelOriginImage==CV_8UC1) { - - val = tr("%1").arg(qRed(rgbValue)); - painter->drawText(QRect(point1.x(),point1.y(),param_matrixWorld.m11(),param_matrixWorld.m11()), + QString val = tr("%1").arg(qRed(rgbValue)); + painter->drawText(QRect(pos_in_view.x(),pos_in_view.y(),pixel_width,pixel_height), Qt::AlignCenter, val); } } From 7d0f6b4d68b37234acdb0a399e2e95b9a7d39143 Mon Sep 17 00:00:00 2001 From: Leszek Swirski Date: Tue, 21 May 2013 17:54:58 +0100 Subject: [PATCH 011/121] Fix image saving from QT toolbar --- modules/highgui/src/window_QT.cpp | 18 ++++++++---------- modules/highgui/src/window_QT.h | 1 - 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/modules/highgui/src/window_QT.cpp b/modules/highgui/src/window_QT.cpp index 438c356f7..0c50c7070 100644 --- a/modules/highgui/src/window_QT.cpp +++ b/modules/highgui/src/window_QT.cpp @@ -2473,35 +2473,33 @@ void DefaultViewPort::saveView() if (!fileName.isEmpty()) //save the picture { QString extension = fileName.right(3); - - // (no need anymore) create the image resized to receive the 'screenshot' - // image2Draw_qt_resized = QImage(viewport()->width(), viewport()->height(),QImage::Format_RGB888); - - QPainter saveimage(&image2Draw_qt_resized); - this->render(&saveimage); + + // Create a new pixmap to render the viewport into + QPixmap viewportPixmap(viewport()->size()); + viewport()->render(&viewportPixmap); // Save it.. if (QString::compare(extension, "png", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "PNG"); + viewportPixmap.save(fileName, "PNG"); return; } if (QString::compare(extension, "jpg", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "JPG"); + viewportPixmap.save(fileName, "JPG"); return; } if (QString::compare(extension, "bmp", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "BMP"); + viewportPixmap.save(fileName, "BMP"); return; } if (QString::compare(extension, "jpeg", Qt::CaseInsensitive) == 0) { - image2Draw_qt_resized.save(fileName, "JPEG"); + viewportPixmap.save(fileName, "JPEG"); return; } diff --git a/modules/highgui/src/window_QT.h b/modules/highgui/src/window_QT.h index 089997f51..a96a8c6e6 100644 --- a/modules/highgui/src/window_QT.h +++ b/modules/highgui/src/window_QT.h @@ -522,7 +522,6 @@ private: CvMat* image2Draw_mat; QImage image2Draw_qt; - QImage image2Draw_qt_resized; int nbChannelOriginImage; //for mouse callback From ab6be9b7b7691967e42297aa6d3a67fb07597fd8 Mon Sep 17 00:00:00 2001 From: Peter Minin Date: Thu, 6 Jun 2013 19:00:55 +0400 Subject: [PATCH 012/121] Add a variant of detectMultiScale with an argument 'weights' that receives the number of neighbors joined into each detected object --- .../objdetect/doc/cascade_classification.rst | 3 +++ .../include/opencv2/objdetect/objdetect.hpp | 11 ++++++++- modules/objdetect/src/cascadedetect.cpp | 24 ++++++++++++++++++- 3 files changed, 36 insertions(+), 2 deletions(-) diff --git a/modules/objdetect/doc/cascade_classification.rst b/modules/objdetect/doc/cascade_classification.rst index eb07a6c8f..a00bdc933 100644 --- a/modules/objdetect/doc/cascade_classification.rst +++ b/modules/objdetect/doc/cascade_classification.rst @@ -189,6 +189,7 @@ CascadeClassifier::detectMultiScale Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles. .. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) +.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, vector& weights, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize]]]]]) -> objects .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image, rejectLevels, levelWeights[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize[, outputRejectLevels]]]]]]) -> objects @@ -203,6 +204,8 @@ Detects objects of different sizes in the input image. The detected objects are :param objects: Vector of rectangles where each rectangle contains the detected object. + :param weights: Vector of weights of the corresponding objects. Weight is the number of neighboring positively classified rectangles that were joined into one object. + :param scaleFactor: Parameter specifying how much the image size is reduced at each image scale. :param minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it. diff --git a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp index 8d7efb0ba..7924b67e5 100644 --- a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp @@ -382,6 +382,14 @@ public: Size minSize=Size(), Size maxSize=Size() ); + CV_WRAP virtual void detectMultiScale( const Mat& image, + CV_OUT vector& objects, + vector& weights, + double scaleFactor=1.1, + int minNeighbors=3, int flags=0, + Size minSize=Size(), + Size maxSize=Size() ); + CV_WRAP virtual void detectMultiScale( const Mat& image, CV_OUT vector& objects, vector& rejectLevels, @@ -390,7 +398,8 @@ public: int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size(), - bool outputRejectLevels=false ); + bool outputRejectLevels=false, + bool outputWeights=false ); bool isOldFormatCascade() const; diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 9e78dce24..341ef2a0d 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -1023,6 +1023,7 @@ public: }; struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; +struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neighbors; } }; bool CascadeClassifier::detectSingleScale( const Mat& image, int stripCount, Size processingRectSize, @@ -1092,11 +1093,12 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object vector& levelWeights, double scaleFactor, int minNeighbors, int flags, Size minObjectSize, Size maxObjectSize, - bool outputRejectLevels ) + bool outputRejectLevels, bool outputWeights ) { const double GROUP_EPS = 0.2; CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); + CV_Assert( !( outputRejectLevels && outputWeights ) ); if( empty() ) return; @@ -1111,6 +1113,12 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object Seq(_objects).copyTo(vecAvgComp); objects.resize(vecAvgComp.size()); std::transform(vecAvgComp.begin(), vecAvgComp.end(), objects.begin(), getRect()); + if( outputWeights ) + { + rejectLevels.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), rejectLevels.begin(), + getNeighbors()); + } return; } @@ -1183,6 +1191,10 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object { groupRectangles( objects, rejectLevels, levelWeights, minNeighbors, GROUP_EPS ); } + else if( outputWeights ) + { + groupRectangles( objects, rejectLevels, minNeighbors, GROUP_EPS ); + } else { groupRectangles( objects, minNeighbors, GROUP_EPS ); @@ -1199,6 +1211,16 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object minNeighbors, flags, minObjectSize, maxObjectSize, false ); } +void CascadeClassifier::detectMultiScale( const Mat& image, CV_OUT vector& objects, + vector& weights, double scaleFactor, + int minNeighbors, int flags, Size minObjectSize, + Size maxObjectSize ) +{ + vector fakeLevelWeights; + detectMultiScale( image, objects, weights, fakeLevelWeights, scaleFactor, + minNeighbors, flags, minObjectSize, maxObjectSize, false, true ); +} + bool CascadeClassifier::Data::read(const FileNode &root) { static const float THRESHOLD_EPS = 1e-5f; From 33d1f675015b22b7f4a938a934c0448b21c731ee Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 7 Jun 2013 19:07:00 +0400 Subject: [PATCH 013/121] Include the OpenCV config headers into every module. This has no bearing on compilation, but it makes them show up in IDEs. --- cmake/OpenCVModule.cmake | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cmake/OpenCVModule.cmake b/cmake/OpenCVModule.cmake index e8619ad7d..81340bd0e 100644 --- a/cmake/OpenCVModule.cmake +++ b/cmake/OpenCVModule.cmake @@ -470,7 +470,8 @@ endmacro() # ocv_create_module() # ocv_create_module(SKIP_LINK) macro(ocv_create_module) - add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES}) + add_library(${the_module} ${OPENCV_MODULE_TYPE} ${OPENCV_MODULE_${the_module}_HEADERS} ${OPENCV_MODULE_${the_module}_SOURCES} + "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/cvconfig.h" "${OPENCV_CONFIG_FILE_INCLUDE_DIR}/opencv2/opencv_modules.hpp") if(NOT "${ARGN}" STREQUAL "SKIP_LINK") target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS} ${OPENCV_MODULE_${the_module}_DEPS_EXT} ${OPENCV_LINKER_LIBS} ${IPP_LIBS} ${ARGN}) From 264d26e6718ed55df0f3f187908e3a28481bbe2b Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Sat, 8 Jun 2013 12:41:57 +0400 Subject: [PATCH 014/121] fixing empty Mat case --- modules/java/generator/src/java/core+MatOfByte.java | 4 ++-- modules/java/generator/src/java/core+MatOfDouble.java | 4 ++-- modules/java/generator/src/java/core+MatOfFloat.java | 4 ++-- modules/java/generator/src/java/core+MatOfFloat4.java | 4 ++-- modules/java/generator/src/java/core+MatOfFloat6.java | 4 ++-- modules/java/generator/src/java/core+MatOfInt.java | 4 ++-- modules/java/generator/src/java/core+MatOfInt4.java | 4 ++-- modules/java/generator/src/java/core+MatOfKeyPoint.java | 4 ++-- modules/java/generator/src/java/core+MatOfPoint.java | 4 ++-- modules/java/generator/src/java/core+MatOfPoint2f.java | 4 ++-- modules/java/generator/src/java/core+MatOfPoint3.java | 4 ++-- modules/java/generator/src/java/core+MatOfPoint3f.java | 4 ++-- modules/java/generator/src/java/core+MatOfRect.java | 4 ++-- 13 files changed, 26 insertions(+), 26 deletions(-) diff --git a/modules/java/generator/src/java/core+MatOfByte.java b/modules/java/generator/src/java/core+MatOfByte.java index 0ebdb6673..b3fe5691e 100644 --- a/modules/java/generator/src/java/core+MatOfByte.java +++ b/modules/java/generator/src/java/core+MatOfByte.java @@ -14,7 +14,7 @@ public class MatOfByte extends Mat { protected MatOfByte(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfByte extends Mat { public MatOfByte(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfDouble.java b/modules/java/generator/src/java/core+MatOfDouble.java index cca525110..4eb7cbc28 100644 --- a/modules/java/generator/src/java/core+MatOfDouble.java +++ b/modules/java/generator/src/java/core+MatOfDouble.java @@ -14,7 +14,7 @@ public class MatOfDouble extends Mat { protected MatOfDouble(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfDouble extends Mat { public MatOfDouble(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfFloat.java b/modules/java/generator/src/java/core+MatOfFloat.java index ce73b6f63..96bbeab9f 100644 --- a/modules/java/generator/src/java/core+MatOfFloat.java +++ b/modules/java/generator/src/java/core+MatOfFloat.java @@ -14,7 +14,7 @@ public class MatOfFloat extends Mat { protected MatOfFloat(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfFloat extends Mat { public MatOfFloat(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfFloat4.java b/modules/java/generator/src/java/core+MatOfFloat4.java index 8a3e51014..aaa97b799 100644 --- a/modules/java/generator/src/java/core+MatOfFloat4.java +++ b/modules/java/generator/src/java/core+MatOfFloat4.java @@ -14,7 +14,7 @@ public class MatOfFloat4 extends Mat { protected MatOfFloat4(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfFloat4 extends Mat { public MatOfFloat4(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfFloat6.java b/modules/java/generator/src/java/core+MatOfFloat6.java index 1e23101a7..68e6249b6 100644 --- a/modules/java/generator/src/java/core+MatOfFloat6.java +++ b/modules/java/generator/src/java/core+MatOfFloat6.java @@ -14,7 +14,7 @@ public class MatOfFloat6 extends Mat { protected MatOfFloat6(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfFloat6 extends Mat { public MatOfFloat6(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfInt.java b/modules/java/generator/src/java/core+MatOfInt.java index 80c5b3a5c..33e5124e4 100644 --- a/modules/java/generator/src/java/core+MatOfInt.java +++ b/modules/java/generator/src/java/core+MatOfInt.java @@ -15,7 +15,7 @@ public class MatOfInt extends Mat { protected MatOfInt(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -26,7 +26,7 @@ public class MatOfInt extends Mat { public MatOfInt(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfInt4.java b/modules/java/generator/src/java/core+MatOfInt4.java index 60277103c..c924233a6 100644 --- a/modules/java/generator/src/java/core+MatOfInt4.java +++ b/modules/java/generator/src/java/core+MatOfInt4.java @@ -15,7 +15,7 @@ public class MatOfInt4 extends Mat { protected MatOfInt4(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -26,7 +26,7 @@ public class MatOfInt4 extends Mat { public MatOfInt4(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfKeyPoint.java b/modules/java/generator/src/java/core+MatOfKeyPoint.java index b91fedcee..b402fe124 100644 --- a/modules/java/generator/src/java/core+MatOfKeyPoint.java +++ b/modules/java/generator/src/java/core+MatOfKeyPoint.java @@ -16,7 +16,7 @@ public class MatOfKeyPoint extends Mat { protected MatOfKeyPoint(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -27,7 +27,7 @@ public class MatOfKeyPoint extends Mat { public MatOfKeyPoint(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint.java b/modules/java/generator/src/java/core+MatOfPoint.java index 23eeed0eb..6d23ed116 100644 --- a/modules/java/generator/src/java/core+MatOfPoint.java +++ b/modules/java/generator/src/java/core+MatOfPoint.java @@ -14,7 +14,7 @@ public class MatOfPoint extends Mat { protected MatOfPoint(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint extends Mat { public MatOfPoint(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint2f.java b/modules/java/generator/src/java/core+MatOfPoint2f.java index ba4be4ac5..0c6960730 100644 --- a/modules/java/generator/src/java/core+MatOfPoint2f.java +++ b/modules/java/generator/src/java/core+MatOfPoint2f.java @@ -14,7 +14,7 @@ public class MatOfPoint2f extends Mat { protected MatOfPoint2f(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint2f extends Mat { public MatOfPoint2f(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint3.java b/modules/java/generator/src/java/core+MatOfPoint3.java index 16e21301e..0c8374f25 100644 --- a/modules/java/generator/src/java/core+MatOfPoint3.java +++ b/modules/java/generator/src/java/core+MatOfPoint3.java @@ -14,7 +14,7 @@ public class MatOfPoint3 extends Mat { protected MatOfPoint3(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint3 extends Mat { public MatOfPoint3(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfPoint3f.java b/modules/java/generator/src/java/core+MatOfPoint3f.java index 97e2a9570..b0d50d450 100644 --- a/modules/java/generator/src/java/core+MatOfPoint3f.java +++ b/modules/java/generator/src/java/core+MatOfPoint3f.java @@ -14,7 +14,7 @@ public class MatOfPoint3f extends Mat { protected MatOfPoint3f(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -25,7 +25,7 @@ public class MatOfPoint3f extends Mat { public MatOfPoint3f(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } diff --git a/modules/java/generator/src/java/core+MatOfRect.java b/modules/java/generator/src/java/core+MatOfRect.java index 2e58bfe89..3844d9dfb 100644 --- a/modules/java/generator/src/java/core+MatOfRect.java +++ b/modules/java/generator/src/java/core+MatOfRect.java @@ -15,7 +15,7 @@ public class MatOfRect extends Mat { protected MatOfRect(long addr) { super(addr); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } @@ -26,7 +26,7 @@ public class MatOfRect extends Mat { public MatOfRect(Mat m) { super(m, Range.all()); - if(checkVector(_channels, _depth) < 0 ) + if( !empty() && checkVector(_channels, _depth) < 0 ) throw new IllegalArgumentException("Incomatible Mat"); //FIXME: do we need release() here? } From a39a9f677f39e5301a533d39ebf7c9569b1d9126 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 10 Jun 2013 11:06:28 +0400 Subject: [PATCH 015/121] NativeActivity sample build with Eclipse CDT fixed. --- samples/android/native-activity/.cproject | 136 ++++++++---------- samples/android/native-activity/.project | 68 +++++++++ .../android/native-activity/jni/native.cpp | 14 +- 3 files changed, 136 insertions(+), 82 deletions(-) diff --git a/samples/android/native-activity/.cproject b/samples/android/native-activity/.cproject index 09687f3ac..44aadfe9a 100644 --- a/samples/android/native-activity/.cproject +++ b/samples/android/native-activity/.cproject @@ -1,75 +1,61 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/samples/android/native-activity/.project b/samples/android/native-activity/.project index cf0823c0b..c20be83f6 100644 --- a/samples/android/native-activity/.project +++ b/samples/android/native-activity/.project @@ -5,6 +5,64 @@ + + org.eclipse.cdt.managedbuilder.core.genmakebuilder + auto,full,incremental, + + + ?name? + + + + org.eclipse.cdt.make.core.append_environment + true + + + org.eclipse.cdt.make.core.autoBuildTarget + + + + org.eclipse.cdt.make.core.buildArguments + + + + org.eclipse.cdt.make.core.buildCommand + "${NDKROOT}/ndk-build.cmd" + + + org.eclipse.cdt.make.core.cleanBuildTarget + clean + + + org.eclipse.cdt.make.core.contents + org.eclipse.cdt.make.core.activeConfigSettings + + + org.eclipse.cdt.make.core.enableAutoBuild + true + + + org.eclipse.cdt.make.core.enableCleanBuild + false + + + org.eclipse.cdt.make.core.enableFullBuild + true + + + org.eclipse.cdt.make.core.fullBuildTarget + + + + org.eclipse.cdt.make.core.stopOnError + true + + + org.eclipse.cdt.make.core.useDefaultBuildCmd + false + + + com.android.ide.eclipse.adt.ResourceManagerBuilder @@ -25,9 +83,19 @@ + + org.eclipse.cdt.managedbuilder.core.ScannerConfigBuilder + full,incremental, + + + com.android.ide.eclipse.adt.AndroidNature org.eclipse.jdt.core.javanature + org.eclipse.cdt.core.cnature + org.eclipse.cdt.core.ccnature + org.eclipse.cdt.managedbuilder.core.managedBuildNature + org.eclipse.cdt.managedbuilder.core.ScannerConfigNature diff --git a/samples/android/native-activity/jni/native.cpp b/samples/android/native-activity/jni/native.cpp index 66bc006db..5cfb3a961 100644 --- a/samples/android/native-activity/jni/native.cpp +++ b/samples/android/native-activity/jni/native.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -60,7 +59,7 @@ static cv::Size calc_optimal_camera_resolution(const char* supported, int width, } } - idx++; // to skip coma symbol + idx++; // to skip comma symbol } while(supported[idx-1] != '\0'); @@ -86,9 +85,9 @@ static void engine_draw_frame(Engine* engine, const cv::Mat& frame) for (int yy = top_indent; yy < std::min(frame.rows+top_indent, buffer.height); yy++) { - unsigned char* line = (unsigned char*)pixels; - memcpy(line+left_indent*4*sizeof(unsigned char), frame.ptr(yy), - std::min(frame.cols, buffer.width)*4*sizeof(unsigned char)); + unsigned char* line = (unsigned char*)pixels + left_indent*4*sizeof(unsigned char); + size_t line_size = std::min(frame.cols, buffer.width)*4*sizeof(unsigned char); + memcpy(line, frame.ptr(yy), line_size); // go to next line pixels = (int32_t*)pixels + buffer.stride; } @@ -139,7 +138,7 @@ static void engine_handle_cmd(android_app* app, int32_t cmd) return; } - LOGI("Camera initialized at resoution %dx%d", camera_resolution.width, camera_resolution.height); + LOGI("Camera initialized at resolution %dx%d", camera_resolution.width, camera_resolution.height); } break; case APP_CMD_TERM_WINDOW: @@ -157,7 +156,8 @@ void android_main(android_app* app) // Make sure glue isn't stripped. app_dummy(); - memset(&engine, 0, sizeof(engine)); + size_t engine_size = sizeof(engine); // for Eclipse CDT parser + memset((void*)&engine, 0, engine_size); app->userData = &engine; app->onAppCmd = engine_handle_cmd; engine.app = app; From e77abeef16a4214e3240dc5fb91cd2b0459b7a10 Mon Sep 17 00:00:00 2001 From: Peng Xiao Date: Mon, 10 Jun 2013 16:38:22 +0800 Subject: [PATCH 016/121] Add a new global function to control ocl binary storage Previously the feature is controlled by setBinpath implicitly. We add the function to cope with setBinpath and setBinpath is only useful when setBinaryDiskCache is set. Refer to the header to see more info. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 20 ++++++-- modules/ocl/src/initialization.cpp | 62 ++++++++++++++++++------- 2 files changed, 62 insertions(+), 20 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 01b0f72d2..730c2e6b8 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -118,9 +118,6 @@ namespace cv //the devnum is the index of the selected device in DeviceName vector of INfo CV_EXPORTS void setDevice(Info &oclinfo, int devnum = 0); - //optional function, if you want save opencl binary kernel to the file, set its path - CV_EXPORTS void setBinpath(const char *path); - //The two functions below enable other opencl program to use ocl module's cl_context and cl_command_queue //returns cl_context * CV_EXPORTS void* getoclContext(); @@ -181,6 +178,23 @@ namespace cv bool finish = true, bool measureKernelTime = false, bool cleanUp = true); + //! Enable or disable OpenCL program binary caching onto local disk + // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the compiled program to be + // cached onto local disk automatically, which may accelerate subsequent runs. + // Caching mode is controlled by the following enum + // Note, the feature is by default enabled when OpenCV is built in release mode. + // enum BinaryDiskCacheMode + const int CACHE_NONE = 0; + const int CACHE_DEBUG = 0x1 << 0; + const int CACHE_RELEASE = 0x1 << 1; + const int CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE; + const int CACHE_UPDATE = 0x1 << 2; // if the binary cache file with the same name is already on the disk, it will be updated. + + CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); + + //! set where binary cache to be saved to + CV_EXPORTS void setBinpath(const char *path); + class CV_EXPORTS oclMatExpr; //////////////////////////////// oclMat //////////////////////////////// class CV_EXPORTS oclMat diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index a9cd08b9f..9a0915ce5 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -124,7 +124,8 @@ namespace cv cacheSize = 0; } - + // not to be exported to dynamic lib + void setBinaryDiskCacheImpl(int mode, String path, Info::Impl * impl); struct Info::Impl { cl_platform_id oclplatform; @@ -142,22 +143,12 @@ namespace cv char extra_options[512]; int double_support; int unified_memory; //1 means integrated GPU, otherwise this value is 0 + bool enable_disk_cache; + bool update_disk_cache; string binpath; int refcounter; - Impl() - { - refcounter = 1; - oclplatform = 0; - oclcontext = 0; - clCmdQueue = 0; - devnum = -1; - maxComputeUnits = 0; - maxWorkGroupSize = 0; - memset(extra_options, 0, 512); - double_support = 0; - unified_memory = 0; - } + Impl(); void setDevice(void *ctx, void *q, int devnum); @@ -182,6 +173,25 @@ namespace cv void releaseResources(); }; + Info::Impl::Impl() + :oclplatform(0), + oclcontext(0), + clCmdQueue(0), + devnum(-1), + maxWorkGroupSize(0), + maxDimensions(0), + maxComputeUnits(0), + double_support(0), + unified_memory(0), + enable_disk_cache(false), + update_disk_cache(false), + binpath("./"), + refcounter(1) + { + memset(extra_options, 0, 512); + setBinaryDiskCacheImpl(CACHE_RELEASE, String("./"), this); + } + void Info::Impl::releaseResources() { devnum = -1; @@ -494,6 +504,24 @@ namespace cv return openCLGetKernelFromSource(clCxt, source, kernelName, NULL); } + void setBinaryDiskCacheImpl(int mode, String path, Info::Impl * impl) + { + impl->update_disk_cache = (mode & CACHE_UPDATE) == CACHE_UPDATE; + impl->enable_disk_cache = +#if !defined(NDEBUG) || defined(_DEBUG) + (mode & CACHE_DEBUG) == CACHE_DEBUG; +#else + (mode & CACHE_RELEASE) == CACHE_RELEASE; +#endif + if(impl->enable_disk_cache && !path.empty()) + { + impl->binpath = path; + } + } + void setBinaryDiskCache(int mode, cv::String path) + { + setBinaryDiskCacheImpl(mode, path, Context::getContext()->impl); + } void setBinpath(const char *path) { @@ -573,8 +601,8 @@ namespace cv filename = clCxt->impl->binpath + kernelName + "_" + clCxt->impl->devName[clCxt->impl->devnum] + ".clb"; } - FILE *fp = fopen(filename.c_str(), "rb"); - if(fp == NULL || clCxt->impl->binpath.size() == 0) //we should generate a binary file for the first time. + FILE *fp = clCxt->impl->enable_disk_cache ? fopen(filename.c_str(), "rb") : NULL; + if(fp == NULL || clCxt->impl->update_disk_cache) { if(fp != NULL) fclose(fp); @@ -583,7 +611,7 @@ namespace cv clCxt->impl->oclcontext, 1, source, NULL, &status); openCLVerifyCall(status); status = clBuildProgram(program, 1, &(clCxt->impl->devices[clCxt->impl->devnum]), all_build_options, NULL, NULL); - if(status == CL_SUCCESS && clCxt->impl->binpath.size()) + if(status == CL_SUCCESS && clCxt->impl->enable_disk_cache) savetofile(clCxt, program, filename.c_str()); } else From 41482fe56ca1eb98461bbd202c869faf4442c19c Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Mon, 10 Jun 2013 13:30:23 +0400 Subject: [PATCH 017/121] Erase MatchPairsBody's copy constructor. It's the same as the implicitly defined one, and it causes a -Wextra warning (not initializing the base class in a copy constructor). --- modules/stitching/src/matchers.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/modules/stitching/src/matchers.cpp b/modules/stitching/src/matchers.cpp index b5bd8ad4d..d918cfff2 100644 --- a/modules/stitching/src/matchers.cpp +++ b/modules/stitching/src/matchers.cpp @@ -68,10 +68,6 @@ struct DistIdxPair struct MatchPairsBody : ParallelLoopBody { - MatchPairsBody(const MatchPairsBody& other) - : matcher(other.matcher), features(other.features), - pairwise_matches(other.pairwise_matches), near_pairs(other.near_pairs) {} - MatchPairsBody(FeaturesMatcher &_matcher, const vector &_features, vector &_pairwise_matches, vector > &_near_pairs) : matcher(_matcher), features(_features), From 99a5b3417aabfb52d1c75e4999e2e0ada5983a50 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 10 Jun 2013 13:41:46 +0400 Subject: [PATCH 018/121] added missing BackgroundSubtractorMOG2 parameters --- modules/video/src/video_init.cpp | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/modules/video/src/video_init.cpp b/modules/video/src/video_init.cpp index 0f3cec144..7ec860fbd 100644 --- a/modules/video/src/video_init.cpp +++ b/modules/video/src/video_init.cpp @@ -60,7 +60,15 @@ CV_INIT_ALGORITHM(BackgroundSubtractorMOG2, "BackgroundSubtractor.MOG2", obj.info()->addParam(obj, "history", obj.history); obj.info()->addParam(obj, "nmixtures", obj.nmixtures); obj.info()->addParam(obj, "varThreshold", obj.varThreshold); - obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection)); + obj.info()->addParam(obj, "detectShadows", obj.bShadowDetection); + obj.info()->addParam(obj, "backgroundRatio", obj.backgroundRatio); + obj.info()->addParam(obj, "varThresholdGen", obj.varThresholdGen); + obj.info()->addParam(obj, "fVarInit", obj.fVarInit); + obj.info()->addParam(obj, "fVarMin", obj.fVarMin); + obj.info()->addParam(obj, "fVarMax", obj.fVarMax); + obj.info()->addParam(obj, "fCT", obj.fCT); + obj.info()->addParam(obj, "nShadowDetection", obj.nShadowDetection); + obj.info()->addParam(obj, "fTau", obj.fTau)); /////////////////////////////////////////////////////////////////////////////////////////////////////////// From 1d8cd3a717160f2097bcb7765097ed6196b3b535 Mon Sep 17 00:00:00 2001 From: Peng Xiao Date: Mon, 10 Jun 2013 18:37:48 +0800 Subject: [PATCH 019/121] Add ocl CLACH implementation. Test cases (accuracy and performance) are provided. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 17 ++ modules/ocl/perf/perf_imgproc.cpp | 49 ++++- modules/ocl/src/imgproc.cpp | 185 ++++++++++++++++ modules/ocl/src/opencl/imgproc_clahe.cl | 275 ++++++++++++++++++++++++ modules/ocl/test/test_imgproc.cpp | 45 ++++ 5 files changed, 570 insertions(+), 1 deletion(-) create mode 100644 modules/ocl/src/opencl/imgproc_clahe.cl diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 01b0f72d2..4a5debf50 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -483,6 +483,23 @@ namespace cv CV_EXPORTS void calcHist(const oclMat &mat_src, oclMat &mat_hist); //! only 8UC1 and 256 bins is supported now CV_EXPORTS void equalizeHist(const oclMat &mat_src, oclMat &mat_dst); + + //! only 8UC1 is supported now + class CV_EXPORTS CLAHE + { + public: + virtual void apply(const oclMat &src, oclMat &dst) = 0; + + virtual void setClipLimit(double clipLimit) = 0; + virtual double getClipLimit() const = 0; + + virtual void setTilesGridSize(Size tileGridSize) = 0; + virtual Size getTilesGridSize() const = 0; + + virtual void collectGarbage() = 0; + }; + CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); + //! bilateralFilter // supports 8UC1 8UC4 CV_EXPORTS void bilateralFilter(const oclMat& src, oclMat& dst, int d, double sigmaColor, double sigmaSpave, int borderType=BORDER_DEFAULT); diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 0aef8b27e..e87e8213d 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -921,4 +921,51 @@ PERFTEST(remap) } } -} \ No newline at end of file +} +///////////// CLAHE //////////////////////// +PERFTEST(CLAHE) +{ + Mat src, dst, ocl_dst; + cv::ocl::oclMat d_src, d_dst; + int all_type[] = {CV_8UC1}; + std::string type_name[] = {"CV_8UC1"}; + + double clipLimit = 40.0; + + cv::Ptr clahe = cv::createCLAHE(clipLimit); + cv::Ptr d_clahe = cv::ocl::createCLAHE(clipLimit); + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j] ; + + gen(src, size, size, all_type[j], 0, 256); + + CPU_ON; + clahe->apply(src, dst); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + d_clahe->apply(d_src, d_dst); + WARMUP_OFF; + + ocl_dst = d_dst; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 1.0); + + GPU_ON; + d_clahe->apply(d_src, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + d_clahe->apply(d_src, d_dst); + d_dst.download(dst); + GPU_FULL_OFF; + } + } +} diff --git a/modules/ocl/src/imgproc.cpp b/modules/ocl/src/imgproc.cpp index ef48b8eaf..3dbd68df8 100644 --- a/modules/ocl/src/imgproc.cpp +++ b/modules/ocl/src/imgproc.cpp @@ -25,6 +25,7 @@ // Xu Pang, pangxu010@163.com // Wu Zailong, bullet@yeah.net // Wenju He, wenju@multicorewareinc.com +// Sen Liu, swjtuls1987@126.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -80,6 +81,7 @@ namespace cv extern const char *imgproc_calcHarris; extern const char *imgproc_calcMinEigenVal; extern const char *imgproc_convolve; + extern const char *imgproc_clahe; ////////////////////////////////////OpenCL call wrappers//////////////////////////// template struct index_and_sizeof; @@ -1511,6 +1513,189 @@ namespace cv openCLExecuteKernel(clCxt, &imgproc_histogram, kernelName, globalThreads, localThreads, args, -1, -1); LUT(mat_src, lut, mat_dst); } + + //////////////////////////////////////////////////////////////////////// + // CLAHE + namespace clahe + { + inline int divUp(int total, int grain) + { + return (total + grain - 1) / grain * grain; + } + + static void calcLut(const oclMat &src, oclMat &dst, + const int tilesX, const int tilesY, const cv::Size tileSize, + const int clipLimit, const float lutScale) + { + cl_int2 tile_size; + tile_size.s[0] = tileSize.width; + tile_size.s[1] = tileSize.height; + + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); + args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&clipLimit )); + args.push_back( std::make_pair( sizeof(cl_float), (void *)&lutScale )); + + String kernelName = "calcLut"; + size_t localThreads[3] = { 32, 8, 1 }; + size_t globalThreads[3] = { tilesX * localThreads[0], tilesY * localThreads[1], 1 }; + bool is_cpu = queryDeviceInfo(); + if (is_cpu) + { + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, (char*)" -D CPU"); + } + else + { + cl_kernel kernel = openCLGetKernelFromSource(Context::getContext(), &imgproc_clahe, kernelName); + int wave_size = queryDeviceInfo(kernel); + openCLSafeCall(clReleaseKernel(kernel)); + + static char opt[20] = {0}; + sprintf(opt, " -D WAVE_SIZE=%d", wave_size); + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1, opt); + } + } + + static void transform(const oclMat &src, oclMat &dst, const oclMat &lut, + const int tilesX, const int tilesY, const cv::Size tileSize) + { + cl_int2 tile_size; + tile_size.s[0] = tileSize.width; + tile_size.s[1] = tileSize.height; + + std::vector > args; + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&src.data )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&dst.data )); + args.push_back( std::make_pair( sizeof(cl_mem), (void *)&lut.data )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&dst.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&lut.step )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.cols )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&src.rows )); + args.push_back( std::make_pair( sizeof(cl_int2), (void *)&tile_size )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesX )); + args.push_back( std::make_pair( sizeof(cl_int), (void *)&tilesY )); + + String kernelName = "transform"; + size_t localThreads[3] = { 32, 8, 1 }; + size_t globalThreads[3] = { divUp(src.cols, localThreads[0]), divUp(src.rows, localThreads[1]), 1 }; + + openCLExecuteKernel(Context::getContext(), &imgproc_clahe, kernelName, globalThreads, localThreads, args, -1, -1); + } + } + + namespace + { + class CLAHE_Impl : public cv::ocl::CLAHE + { + public: + CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8); + + cv::AlgorithmInfo* info() const; + + void apply(const oclMat &src, oclMat &dst); + + void setClipLimit(double clipLimit); + double getClipLimit() const; + + void setTilesGridSize(cv::Size tileGridSize); + cv::Size getTilesGridSize() const; + + void collectGarbage(); + + private: + double clipLimit_; + int tilesX_; + int tilesY_; + + oclMat srcExt_; + oclMat lut_; + }; + + CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) : + clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY) + { + } + + void CLAHE_Impl::apply(const oclMat &src, oclMat &dst) + { + CV_Assert( src.type() == CV_8UC1 ); + + dst.create( src.size(), src.type() ); + + const int histSize = 256; + + ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_8UC1, lut_); + + cv::Size tileSize; + oclMat srcForLut; + + if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0) + { + tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_); + srcForLut = src; + } + else + { + cv::ocl::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar()); + + tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_); + srcForLut = srcExt_; + } + + const int tileSizeTotal = tileSize.area(); + const float lutScale = static_cast(histSize - 1) / tileSizeTotal; + + int clipLimit = 0; + if (clipLimit_ > 0.0) + { + clipLimit = static_cast(clipLimit_ * tileSizeTotal / histSize); + clipLimit = std::max(clipLimit, 1); + } + + clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, tileSize, clipLimit, lutScale); + //finish(); + clahe::transform(src, dst, lut_, tilesX_, tilesY_, tileSize); + } + + void CLAHE_Impl::setClipLimit(double clipLimit) + { + clipLimit_ = clipLimit; + } + + double CLAHE_Impl::getClipLimit() const + { + return clipLimit_; + } + + void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize) + { + tilesX_ = tileGridSize.width; + tilesY_ = tileGridSize.height; + } + + cv::Size CLAHE_Impl::getTilesGridSize() const + { + return cv::Size(tilesX_, tilesY_); + } + + void CLAHE_Impl::collectGarbage() + { + srcExt_.release(); + lut_.release(); + } + } + + cv::Ptr createCLAHE(double clipLimit, cv::Size tileGridSize) + { + return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height); + } + //////////////////////////////////bilateralFilter//////////////////////////////////////////////////// static void oclbilateralFilter_8u( const oclMat &src, oclMat &dst, int d, diff --git a/modules/ocl/src/opencl/imgproc_clahe.cl b/modules/ocl/src/opencl/imgproc_clahe.cl new file mode 100644 index 000000000..0d010f7a5 --- /dev/null +++ b/modules/ocl/src/opencl/imgproc_clahe.cl @@ -0,0 +1,275 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. +// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// @Authors +// Sen Liu, swjtuls1987@126.com +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other oclMaterials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors as is and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef WAVE_SIZE +#define WAVE_SIZE 1 +#endif + +int calc_lut(__local int* smem, int val, int tid) +{ + smem[tid] = val; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid == 0) + { + for (int i = 1; i < 256; ++i) + { + smem[i] += smem[i - 1]; + } + } + barrier(CLK_LOCAL_MEM_FENCE); + + return smem[tid]; +} + +#ifdef CPU +void reduce(volatile __local int* smem, int val, int tid) +{ + smem[tid] = val; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) + { + smem[tid] = val += smem[tid + 128]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) + { + smem[tid] = val += smem[tid + 64]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 32) + { + smem[tid] += smem[tid + 32]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 16) + { + smem[tid] += smem[tid + 16]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 8) + { + smem[tid] += smem[tid + 8]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 4) + { + smem[tid] += smem[tid + 4]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 2) + { + smem[tid] += smem[tid + 2]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 1) + { + smem[256] = smem[tid] + smem[tid + 1]; + } + barrier(CLK_LOCAL_MEM_FENCE); +} +#else +void reduce(__local volatile int* smem, int val, int tid) +{ + smem[tid] = val; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) + { + smem[tid] = val += smem[tid + 128]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) + { + smem[tid] = val += smem[tid + 64]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 32) + { + smem[tid] += smem[tid + 32]; +#if WAVE_SIZE < 32 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) { +#endif + smem[tid] += smem[tid + 16]; +#if WAVE_SIZE < 16 + } barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 8) { +#endif + smem[tid] += smem[tid + 8]; + smem[tid] += smem[tid + 4]; + smem[tid] += smem[tid + 2]; + smem[tid] += smem[tid + 1]; + } +} +#endif + +__kernel void calcLut(__global __const uchar * src, __global uchar * lut, + const int srcStep, const int dstStep, + const int2 tileSize, const int tilesX, + const int clipLimit, const float lutScale) +{ + __local int smem[512]; + + const int tx = get_group_id(0); + const int ty = get_group_id(1); + const unsigned int tid = get_local_id(1) * get_local_size(0) + + get_local_id(0); + + smem[tid] = 0; + barrier(CLK_LOCAL_MEM_FENCE); + + for (int i = get_local_id(1); i < tileSize.y; i += get_local_size(1)) + { + __global const uchar* srcPtr = src + mad24( ty * tileSize.y + i, + srcStep, tx * tileSize.x ); + for (int j = get_local_id(0); j < tileSize.x; j += get_local_size(0)) + { + const int data = srcPtr[j]; + atomic_inc(&smem[data]); + } + } + + barrier(CLK_LOCAL_MEM_FENCE); + + int tHistVal = smem[tid]; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (clipLimit > 0) + { + // clip histogram bar + + int clipped = 0; + if (tHistVal > clipLimit) + { + clipped = tHistVal - clipLimit; + tHistVal = clipLimit; + } + + // find number of overall clipped samples + + reduce(smem, clipped, tid); + barrier(CLK_LOCAL_MEM_FENCE); +#ifdef CPU + clipped = smem[256]; +#else + clipped = smem[0]; +#endif + + // broadcast evaluated value + + __local int totalClipped; + + if (tid == 0) + totalClipped = clipped; + barrier(CLK_LOCAL_MEM_FENCE); + + // redistribute clipped samples evenly + + int redistBatch = totalClipped / 256; + tHistVal += redistBatch; + + int residual = totalClipped - redistBatch * 256; + if (tid < residual) + ++tHistVal; + } + + const int lutVal = calc_lut(smem, tHistVal, tid); + uint ires = (uint)convert_int_rte(lutScale * lutVal); + lut[(ty * tilesX + tx) * dstStep + tid] = + convert_uchar(clamp(ires, (uint)0, (uint)255)); +} + +__kernel void transform(__global __const uchar * src, + __global uchar * dst, + __global uchar * lut, + const int srcStep, const int dstStep, const int lutStep, + const int cols, const int rows, + const int2 tileSize, + const int tilesX, const int tilesY) +{ + const int x = get_global_id(0); + const int y = get_global_id(1); + + if (x >= cols || y >= rows) + return; + + const float tyf = (convert_float(y) / tileSize.y) - 0.5f; + int ty1 = convert_int_rtn(tyf); + int ty2 = ty1 + 1; + const float ya = tyf - ty1; + ty1 = max(ty1, 0); + ty2 = min(ty2, tilesY - 1); + + const float txf = (convert_float(x) / tileSize.x) - 0.5f; + int tx1 = convert_int_rtn(txf); + int tx2 = tx1 + 1; + const float xa = txf - tx1; + tx1 = max(tx1, 0); + tx2 = min(tx2, tilesX - 1); + + const int srcVal = src[mad24(y, srcStep, x)]; + + float res = 0; + + res += lut[mad24(ty1 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (1.0f - ya)); + res += lut[mad24(ty1 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (1.0f - ya)); + res += lut[mad24(ty2 * tilesX + tx1, lutStep, srcVal)] * ((1.0f - xa) * (ya)); + res += lut[mad24(ty2 * tilesX + tx2, lutStep, srcVal)] * ((xa) * (ya)); + + uint ires = (uint)convert_int_rte(res); + dst[mad24(y, dstStep, x)] = convert_uchar(clamp(ires, (uint)0, (uint)255)); +} diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index 664f8a391..b9f4740b1 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -23,6 +23,7 @@ // Rock Li, Rock.Li@amd.com // Wu Zailong, bullet@yeah.net // Xu Pang, pangxu010@163.com +// Sen Liu, swjtuls1987@126.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -1393,6 +1394,46 @@ TEST_P(calcHist, Mat) EXPECT_MAT_NEAR(dst_hist, cpu_hist, 0.0); } } +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// CLAHE +namespace +{ + IMPLEMENT_PARAM_CLASS(ClipLimit, double) +} + +PARAM_TEST_CASE(CLAHE, cv::Size, ClipLimit) +{ + cv::Size size; + double clipLimit; + + cv::Mat src; + cv::Mat dst_gold; + + cv::ocl::oclMat g_src; + cv::ocl::oclMat g_dst; + + virtual void SetUp() + { + size = GET_PARAM(0); + clipLimit = GET_PARAM(1); + + cv::RNG &rng = TS::ptr()->get_rng(); + src = randomMat(rng, size, CV_8UC1, 0, 256, false); + g_src.upload(src); + } +}; + +TEST_P(CLAHE, Accuracy) +{ + cv::Ptr clahe = cv::ocl::createCLAHE(clipLimit); + clahe->apply(g_src, g_dst); + cv::Mat dst(g_dst); + + cv::Ptr clahe_gold = cv::createCLAHE(clipLimit); + clahe_gold->apply(src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, 1.0); +} ///////////////////////////Convolve////////////////////////////////// PARAM_TEST_CASE(ConvolveTestBase, MatType, bool) @@ -1643,6 +1684,10 @@ INSTANTIATE_TEST_CASE_P(histTestBase, calcHist, Combine( ONE_TYPE(CV_32SC1) //no use )); +INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine( + Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)), + Values(0.0, 40.0))); + //INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine( // Values(CV_32FC1, CV_32FC1), // Values(false))); // Values(false) is the reserved parameter From 956d8027efeccce0b3e595f556e3eae0bb867fd8 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Mon, 10 Jun 2013 13:29:45 -0700 Subject: [PATCH 020/121] Bug #3044 cap_dshow.cpp forgotten validity check fixed. --- modules/highgui/src/cap_dshow.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/highgui/src/cap_dshow.cpp b/modules/highgui/src/cap_dshow.cpp index 21fb947b1..b7cfbd94b 100644 --- a/modules/highgui/src/cap_dshow.cpp +++ b/modules/highgui/src/cap_dshow.cpp @@ -3195,8 +3195,10 @@ IplImage* CvCaptureCAM_DShow::retrieveFrame(int) frame = cvCreateImage( cvSize(w,h), 8, 3 ); } - VI.getPixels( index, (uchar*)frame->imageData, false, true ); - return frame; + if (VI.getPixels( index, (uchar*)frame->imageData, false, true )) + return frame; + else + return NULL; } double CvCaptureCAM_DShow::getProperty( int property_id ) From d583a79869a780521b18459c5bdf4ec29732805d Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Mon, 10 Jun 2013 17:06:34 +0400 Subject: [PATCH 021/121] Revert "Add a variant of detectMultiScale with an argument 'weights'" It was merged by mistake. This reverts commit ab6be9b7b7691967e42297aa6d3a67fb07597fd8. --- .../objdetect/doc/cascade_classification.rst | 3 --- .../include/opencv2/objdetect/objdetect.hpp | 11 +-------- modules/objdetect/src/cascadedetect.cpp | 24 +------------------ 3 files changed, 2 insertions(+), 36 deletions(-) diff --git a/modules/objdetect/doc/cascade_classification.rst b/modules/objdetect/doc/cascade_classification.rst index a00bdc933..eb07a6c8f 100644 --- a/modules/objdetect/doc/cascade_classification.rst +++ b/modules/objdetect/doc/cascade_classification.rst @@ -189,7 +189,6 @@ CascadeClassifier::detectMultiScale Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles. .. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) -.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, vector& weights, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize]]]]]) -> objects .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image, rejectLevels, levelWeights[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize[, outputRejectLevels]]]]]]) -> objects @@ -204,8 +203,6 @@ Detects objects of different sizes in the input image. The detected objects are :param objects: Vector of rectangles where each rectangle contains the detected object. - :param weights: Vector of weights of the corresponding objects. Weight is the number of neighboring positively classified rectangles that were joined into one object. - :param scaleFactor: Parameter specifying how much the image size is reduced at each image scale. :param minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it. diff --git a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp index 7924b67e5..8d7efb0ba 100644 --- a/modules/objdetect/include/opencv2/objdetect/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect/objdetect.hpp @@ -382,14 +382,6 @@ public: Size minSize=Size(), Size maxSize=Size() ); - CV_WRAP virtual void detectMultiScale( const Mat& image, - CV_OUT vector& objects, - vector& weights, - double scaleFactor=1.1, - int minNeighbors=3, int flags=0, - Size minSize=Size(), - Size maxSize=Size() ); - CV_WRAP virtual void detectMultiScale( const Mat& image, CV_OUT vector& objects, vector& rejectLevels, @@ -398,8 +390,7 @@ public: int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size(), - bool outputRejectLevels=false, - bool outputWeights=false ); + bool outputRejectLevels=false ); bool isOldFormatCascade() const; diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 341ef2a0d..9e78dce24 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -1023,7 +1023,6 @@ public: }; struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; -struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neighbors; } }; bool CascadeClassifier::detectSingleScale( const Mat& image, int stripCount, Size processingRectSize, @@ -1093,12 +1092,11 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object vector& levelWeights, double scaleFactor, int minNeighbors, int flags, Size minObjectSize, Size maxObjectSize, - bool outputRejectLevels, bool outputWeights ) + bool outputRejectLevels ) { const double GROUP_EPS = 0.2; CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); - CV_Assert( !( outputRejectLevels && outputWeights ) ); if( empty() ) return; @@ -1113,12 +1111,6 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object Seq(_objects).copyTo(vecAvgComp); objects.resize(vecAvgComp.size()); std::transform(vecAvgComp.begin(), vecAvgComp.end(), objects.begin(), getRect()); - if( outputWeights ) - { - rejectLevels.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), rejectLevels.begin(), - getNeighbors()); - } return; } @@ -1191,10 +1183,6 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object { groupRectangles( objects, rejectLevels, levelWeights, minNeighbors, GROUP_EPS ); } - else if( outputWeights ) - { - groupRectangles( objects, rejectLevels, minNeighbors, GROUP_EPS ); - } else { groupRectangles( objects, minNeighbors, GROUP_EPS ); @@ -1211,16 +1199,6 @@ void CascadeClassifier::detectMultiScale( const Mat& image, vector& object minNeighbors, flags, minObjectSize, maxObjectSize, false ); } -void CascadeClassifier::detectMultiScale( const Mat& image, CV_OUT vector& objects, - vector& weights, double scaleFactor, - int minNeighbors, int flags, Size minObjectSize, - Size maxObjectSize ) -{ - vector fakeLevelWeights; - detectMultiScale( image, objects, weights, fakeLevelWeights, scaleFactor, - minNeighbors, flags, minObjectSize, maxObjectSize, false, true ); -} - bool CascadeClassifier::Data::read(const FileNode &root) { static const float THRESHOLD_EPS = 1e-5f; From c8398c9fdc6641516d8e195e1ede2efd8a138b3c Mon Sep 17 00:00:00 2001 From: Peng Xiao Date: Tue, 11 Jun 2013 20:32:55 +0800 Subject: [PATCH 022/121] Use anonymous enumerations instead of constants --- modules/ocl/include/opencv2/ocl/ocl.hpp | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 730c2e6b8..dc58f6f2e 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -183,13 +183,14 @@ namespace cv // cached onto local disk automatically, which may accelerate subsequent runs. // Caching mode is controlled by the following enum // Note, the feature is by default enabled when OpenCV is built in release mode. - // enum BinaryDiskCacheMode - const int CACHE_NONE = 0; - const int CACHE_DEBUG = 0x1 << 0; - const int CACHE_RELEASE = 0x1 << 1; - const int CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE; - const int CACHE_UPDATE = 0x1 << 2; // if the binary cache file with the same name is already on the disk, it will be updated. - + enum + { + CACHE_NONE = 0, + CACHE_DEBUG = 0x1 << 0, + CACHE_RELEASE = 0x1 << 1, + CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, + CACHE_UPDATE = 0x1 << 2 // if the binary cache file with the same name is already on the disk, it will be updated. + }; CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); //! set where binary cache to be saved to From e9a74c17f84084446784a2077c6d129b7ca82399 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 22 Apr 2013 14:04:27 +0400 Subject: [PATCH 023/121] refactored VideoWriter class (convert it to abstract interface) --- modules/gpucodec/doc/videoenc.rst | 152 +- modules/gpucodec/include/opencv2/gpucodec.hpp | 178 ++- modules/gpucodec/perf/perf_video.cpp | 8 +- modules/gpucodec/src/precomp.hpp | 3 +- modules/gpucodec/src/video_writer.cpp | 1301 ++++++++--------- modules/gpucodec/test/test_video.cpp | 10 +- samples/gpu/video_writer.cpp | 8 +- 7 files changed, 769 insertions(+), 891 deletions(-) diff --git a/modules/gpucodec/doc/videoenc.rst b/modules/gpucodec/doc/videoenc.rst index ec26e27ef..739ec0d70 100644 --- a/modules/gpucodec/doc/videoenc.rst +++ b/modules/gpucodec/doc/videoenc.rst @@ -5,80 +5,25 @@ Video Encoding -gpu::VideoWriter_GPU +gpucodec::VideoWriter --------------------- -Video writer class. +Video writer interface. -.. ocv:class:: gpu::VideoWriter_GPU +.. ocv:class:: gpucodec::VideoWriter -The class uses H264 video codec. +The implementation uses H264 video codec. .. note:: Currently only Windows platform is supported. -gpu::VideoWriter_GPU::VideoWriter_GPU -------------------------------------- -Constructors. - -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU() -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) - - :param fileName: Name of the output video file. Only AVI file format is supported. - - :param frameSize: Size of the input video frames. - - :param fps: Framerate of the created video stream. - - :param params: Encoder parameters. See :ocv:struct:`gpu::VideoWriter_GPU::EncoderParams` . - - :param format: Surface format of input frames ( ``SF_UYVY`` , ``SF_YUY2`` , ``SF_YV12`` , ``SF_NV12`` , ``SF_IYUV`` , ``SF_BGR`` or ``SF_GRAY``). BGR or gray frames will be converted to YV12 format before encoding, frames with other formats will be used as is. - - :param encoderCallback: Callbacks for video encoder. See :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . Use it if you want to work with raw video stream. - -The constructors initialize video writer. FFMPEG is used to write videos. User can implement own multiplexing with :ocv:class:`gpu::VideoWriter_GPU::EncoderCallBack` . - - - -gpu::VideoWriter_GPU::open --------------------------- -Initializes or reinitializes video writer. - -.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: void gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) -.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR) -.. ocv:function:: void gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) - -The method opens video writer. Parameters are the same as in the constructor :ocv:func:`gpu::VideoWriter_GPU::VideoWriter_GPU` . The method throws :ocv:class:`Exception` if error occurs. - - - -gpu::VideoWriter_GPU::isOpened ------------------------------- -Returns true if video writer has been successfully initialized. - -.. ocv:function:: bool gpu::VideoWriter_GPU::isOpened() const - - - -gpu::VideoWriter_GPU::close ---------------------------- -Releases the video writer. - -.. ocv:function:: void gpu::VideoWriter_GPU::close() - - - -gpu::VideoWriter_GPU::write ---------------------------- +gpucodec::VideoWriter::write +---------------------------- Writes the next video frame. -.. ocv:function:: void gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame = false) +.. ocv:function:: void gpucodec::VideoWriter::write(InputArray frame, bool lastFrame = false) = 0 - :param image: The written frame. + :param frame: The written frame. :param lastFrame: Indicates that it is end of stream. The parameter can be ignored. @@ -86,9 +31,34 @@ The method write the specified image to video file. The image must have the same -gpu::VideoWriter_GPU::EncoderParams ------------------------------------ -.. ocv:struct:: gpu::VideoWriter_GPU::EncoderParams +gpucodec::createVideoWriter +--------------------------- +Creates video writer. + +.. ocv:function:: Ptr gpucodec::createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format = SF_BGR) +.. ocv:function:: Ptr gpucodec::createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) +.. ocv:function:: Ptr gpucodec::createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, SurfaceFormat format = SF_BGR) +.. ocv:function:: Ptr gpucodec::createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR) + + :param fileName: Name of the output video file. Only AVI file format is supported. + + :param frameSize: Size of the input video frames. + + :param fps: Framerate of the created video stream. + + :param params: Encoder parameters. See :ocv:struct:`gpucodec::EncoderParams` . + + :param format: Surface format of input frames ( ``SF_UYVY`` , ``SF_YUY2`` , ``SF_YV12`` , ``SF_NV12`` , ``SF_IYUV`` , ``SF_BGR`` or ``SF_GRAY``). BGR or gray frames will be converted to YV12 format before encoding, frames with other formats will be used as is. + + :param encoderCallback: Callbacks for video encoder. See :ocv:class:`gpucodec::EncoderCallBack` . Use it if you want to work with raw video stream. + +The constructors initialize video writer. FFMPEG is used to write videos. User can implement own multiplexing with :ocv:class:`gpucodec::EncoderCallBack` . + + + +gpucodec::EncoderParams +----------------------- +.. ocv:struct:: gpucodec::EncoderParams Different parameters for CUDA video encoder. :: @@ -123,12 +93,12 @@ Different parameters for CUDA video encoder. :: -gpu::VideoWriter_GPU::EncoderParams::EncoderParams --------------------------------------------------- +gpucodec::EncoderParams::EncoderParams +-------------------------------------- Constructors. -.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams() -.. ocv:function:: gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const String& configFile) +.. ocv:function:: gpucodec::EncoderParams::EncoderParams() +.. ocv:function:: gpucodec::EncoderParams::EncoderParams(const String& configFile) :param configFile: Config file name. @@ -136,29 +106,29 @@ Creates default parameters or reads parameters from config file. -gpu::VideoWriter_GPU::EncoderParams::load ------------------------------------------ +gpucodec::EncoderParams::load +----------------------------- Reads parameters from config file. -.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::load(const String& configFile) +.. ocv:function:: void gpucodec::EncoderParams::load(const String& configFile) :param configFile: Config file name. -gpu::VideoWriter_GPU::EncoderParams::save ------------------------------------------ +gpucodec::EncoderParams::save +----------------------------- Saves parameters to config file. -.. ocv:function:: void gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) const +.. ocv:function:: void gpucodec::EncoderParams::save(const String& configFile) const :param configFile: Config file name. -gpu::VideoWriter_GPU::EncoderCallBack -------------------------------------- -.. ocv:class:: gpu::VideoWriter_GPU::EncoderCallBack +gpucodec::EncoderCallBack +------------------------- +.. ocv:class:: gpucodec::EncoderCallBack Callbacks for CUDA video encoder. :: @@ -182,38 +152,38 @@ Callbacks for CUDA video encoder. :: -gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream -------------------------------------------------------- +gpucodec::EncoderCallBack::acquireBitStream +------------------------------------------- Callback function to signal the start of bitstream that is to be encoded. -.. ocv:function:: virtual uchar* gpu::VideoWriter_GPU::EncoderCallBack::acquireBitStream(int* bufferSize) = 0 +.. ocv:function:: virtual uchar* gpucodec::EncoderCallBack::acquireBitStream(int* bufferSize) = 0 Callback must allocate buffer for CUDA encoder and return pointer to it and it's size. -gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream -------------------------------------------------------- +gpucodec::EncoderCallBack::releaseBitStream +------------------------------------------- Callback function to signal that the encoded bitstream is ready to be written to file. -.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::releaseBitStream(unsigned char* data, int size) = 0 +.. ocv:function:: virtual void gpucodec::EncoderCallBack::releaseBitStream(unsigned char* data, int size) = 0 -gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame ---------------------------------------------------- +gpucodec::EncoderCallBack::onBeginFrame +--------------------------------------- Callback function to signal that the encoding operation on the frame has started. -.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onBeginFrame(int frameNumber, PicType picType) = 0 +.. ocv:function:: virtual void gpucodec::EncoderCallBack::onBeginFrame(int frameNumber, PicType picType) = 0 :param picType: Specify frame type (I-Frame, P-Frame or B-Frame). -gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame -------------------------------------------------- +gpucodec::EncoderCallBack::onEndFrame +------------------------------------- Callback function signals that the encoding operation on the frame has finished. -.. ocv:function:: virtual void gpu::VideoWriter_GPU::EncoderCallBack::onEndFrame(int frameNumber, PicType picType) = 0 +.. ocv:function:: virtual void gpucodec::EncoderCallBack::onEndFrame(int frameNumber, PicType picType) = 0 :param picType: Specify frame type (I-Frame, P-Frame or B-Frame). diff --git a/modules/gpucodec/include/opencv2/gpucodec.hpp b/modules/gpucodec/include/opencv2/gpucodec.hpp index af68c3841..8c3cd86c7 100644 --- a/modules/gpucodec/include/opencv2/gpucodec.hpp +++ b/modules/gpucodec/include/opencv2/gpucodec.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -51,112 +52,102 @@ #include "opencv2/core/gpu.hpp" -namespace cv { namespace gpu { +namespace cv { namespace gpucodec { ////////////////////////////////// Video Encoding ////////////////////////////////// -// Works only under Windows -// Supports olny H264 video codec and AVI files -class CV_EXPORTS VideoWriter_GPU +// Works only under Windows. +// Supports olny H264 video codec and AVI files. + +enum SurfaceFormat +{ + SF_UYVY = 0, + SF_YUY2, + SF_YV12, + SF_NV12, + SF_IYUV, + SF_BGR, + SF_GRAY = SF_BGR +}; + +struct CV_EXPORTS EncoderParams +{ + int P_Interval; // NVVE_P_INTERVAL, + int IDR_Period; // NVVE_IDR_PERIOD, + int DynamicGOP; // NVVE_DYNAMIC_GOP, + int RCType; // NVVE_RC_TYPE, + int AvgBitrate; // NVVE_AVG_BITRATE, + int PeakBitrate; // NVVE_PEAK_BITRATE, + int QP_Level_Intra; // NVVE_QP_LEVEL_INTRA, + int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P, + int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B, + int DeblockMode; // NVVE_DEBLOCK_MODE, + int ProfileLevel; // NVVE_PROFILE_LEVEL, + int ForceIntra; // NVVE_FORCE_INTRA, + int ForceIDR; // NVVE_FORCE_IDR, + int ClearStat; // NVVE_CLEAR_STAT, + int DIMode; // NVVE_SET_DEINTERLACE, + int Presets; // NVVE_PRESETS, + int DisableCabac; // NVVE_DISABLE_CABAC, + int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE + int DisableSPSPPS; // NVVE_DISABLE_SPS_PPS + + EncoderParams(); + explicit EncoderParams(const String& configFile); + + void load(const String& configFile); + void save(const String& configFile) const; +}; + +class CV_EXPORTS EncoderCallBack { public: - struct EncoderParams; - - // Callbacks for video encoder, use it if you want to work with raw video stream - class EncoderCallBack; - - enum SurfaceFormat + enum PicType { - SF_UYVY = 0, - SF_YUY2, - SF_YV12, - SF_NV12, - SF_IYUV, - SF_BGR, - SF_GRAY = SF_BGR + IFRAME = 1, + PFRAME = 2, + BFRAME = 3 }; - VideoWriter_GPU(); - VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); - VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); - ~VideoWriter_GPU(); + virtual ~EncoderCallBack() {} - // all methods throws cv::Exception if error occurs - void open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - void open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); - void open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format = SF_BGR); - void open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); + //! callback function to signal the start of bitstream that is to be encoded + //! callback must allocate host buffer for CUDA encoder and return pointer to it and it's size + virtual uchar* acquireBitStream(int* bufferSize) = 0; - bool isOpened() const; - void close(); + //! callback function to signal that the encoded bitstream is ready to be written to file + virtual void releaseBitStream(unsigned char* data, int size) = 0; - void write(const cv::gpu::GpuMat& image, bool lastFrame = false); + //! callback function to signal that the encoding operation on the frame has started + virtual void onBeginFrame(int frameNumber, PicType picType) = 0; - struct CV_EXPORTS EncoderParams - { - int P_Interval; // NVVE_P_INTERVAL, - int IDR_Period; // NVVE_IDR_PERIOD, - int DynamicGOP; // NVVE_DYNAMIC_GOP, - int RCType; // NVVE_RC_TYPE, - int AvgBitrate; // NVVE_AVG_BITRATE, - int PeakBitrate; // NVVE_PEAK_BITRATE, - int QP_Level_Intra; // NVVE_QP_LEVEL_INTRA, - int QP_Level_InterP; // NVVE_QP_LEVEL_INTER_P, - int QP_Level_InterB; // NVVE_QP_LEVEL_INTER_B, - int DeblockMode; // NVVE_DEBLOCK_MODE, - int ProfileLevel; // NVVE_PROFILE_LEVEL, - int ForceIntra; // NVVE_FORCE_INTRA, - int ForceIDR; // NVVE_FORCE_IDR, - int ClearStat; // NVVE_CLEAR_STAT, - int DIMode; // NVVE_SET_DEINTERLACE, - int Presets; // NVVE_PRESETS, - int DisableCabac; // NVVE_DISABLE_CABAC, - int NaluFramingType; // NVVE_CONFIGURE_NALU_FRAMING_TYPE - int DisableSPSPPS; // NVVE_DISABLE_SPS_PPS - - EncoderParams(); - explicit EncoderParams(const String& configFile); - - void load(const String& configFile); - void save(const String& configFile) const; - }; - - EncoderParams getParams() const; - - class CV_EXPORTS EncoderCallBack - { - public: - enum PicType - { - IFRAME = 1, - PFRAME = 2, - BFRAME = 3 - }; - - virtual ~EncoderCallBack() {} - - // callback function to signal the start of bitstream that is to be encoded - // must return pointer to buffer - virtual uchar* acquireBitStream(int* bufferSize) = 0; - - // callback function to signal that the encoded bitstream is ready to be written to file - virtual void releaseBitStream(unsigned char* data, int size) = 0; - - // callback function to signal that the encoding operation on the frame has started - virtual void onBeginFrame(int frameNumber, PicType picType) = 0; - - // callback function signals that the encoding operation on the frame has finished - virtual void onEndFrame(int frameNumber, PicType picType) = 0; - }; - - class Impl; - -private: - cv::Ptr impl_; + //! callback function signals that the encoding operation on the frame has finished + virtual void onEndFrame(int frameNumber, PicType picType) = 0; }; +class CV_EXPORTS VideoWriter +{ +public: + virtual ~VideoWriter() {} + + //! writes the next frame from GPU memory + virtual void write(InputArray frame, bool lastFrame = false) = 0; + + virtual EncoderParams getEncoderParams() const = 0; +}; + +//! create VideoWriter for specified output file (only AVI file format is supported) +CV_EXPORTS Ptr createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format = SF_BGR); +CV_EXPORTS Ptr createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); + +//! create VideoWriter for user-defined callbacks +CV_EXPORTS Ptr createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, SurfaceFormat format = SF_BGR); +CV_EXPORTS Ptr createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); + +}} // namespace cv { namespace gpucodec { + +namespace cv { namespace gpu { + ////////////////////////////////// Video Decoding ////////////////////////////////////////// namespace detail @@ -257,7 +248,6 @@ private: namespace cv { -template <> CV_EXPORTS void Ptr::delete_obj(); template <> CV_EXPORTS void Ptr::delete_obj(); } diff --git a/modules/gpucodec/perf/perf_video.cpp b/modules/gpucodec/perf/perf_video.cpp index 8f5e1700e..ec9237d37 100644 --- a/modules/gpucodec/perf/perf_video.cpp +++ b/modules/gpucodec/perf/perf_video.cpp @@ -119,7 +119,7 @@ PERF_TEST_P(FileName, VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/19 if (PERF_RUN_GPU()) { - cv::gpu::VideoWriter_GPU d_writer; + cv::Ptr d_writer; cv::gpu::GpuMat d_frame; @@ -130,11 +130,11 @@ PERF_TEST_P(FileName, VideoWriter, Values("gpu/video/768x576.avi", "gpu/video/19 d_frame.upload(frame); - if (!d_writer.isOpened()) - d_writer.open(outputFile, frame.size(), FPS); + if (d_writer.empty()) + d_writer = cv::gpucodec::createVideoWriter(outputFile, frame.size(), FPS); startTimer(); next(); - d_writer.write(d_frame); + d_writer->write(d_frame); stopTimer(); } } diff --git a/modules/gpucodec/src/precomp.hpp b/modules/gpucodec/src/precomp.hpp index c8580c9fe..2afb0abb1 100644 --- a/modules/gpucodec/src/precomp.hpp +++ b/modules/gpucodec/src/precomp.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, diff --git a/modules/gpucodec/src/video_writer.cpp b/modules/gpucodec/src/video_writer.cpp index 94100c0b8..6ffb7c12d 100644 --- a/modules/gpucodec/src/video_writer.cpp +++ b/modules/gpucodec/src/video_writer.cpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -42,36 +43,32 @@ #include "precomp.hpp" +using namespace cv; +using namespace cv::gpu; +using namespace cv::gpucodec; + #if !defined(HAVE_NVCUVID) || !defined(WIN32) -class cv::gpu::VideoWriter_GPU::Impl -{ -}; +cv::gpucodec::EncoderParams::EncoderParams() { throw_no_cuda(); } +cv::gpucodec::EncoderParams::EncoderParams(const String&) { throw_no_cuda(); } +void cv::gpucodec::EncoderParams::load(const String&) { throw_no_cuda(); } +void cv::gpucodec::EncoderParams::save(const String&) const { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::VideoWriter_GPU() { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const String&, cv::Size, double, SurfaceFormat) { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const String&, cv::Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr&, cv::Size, double, SurfaceFormat) { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr&, cv::Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::~VideoWriter_GPU() {} -void cv::gpu::VideoWriter_GPU::open(const String&, cv::Size, double, SurfaceFormat) { throw_no_cuda(); } -void cv::gpu::VideoWriter_GPU::open(const String&, cv::Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); } -void cv::gpu::VideoWriter_GPU::open(const cv::Ptr&, cv::Size, double, SurfaceFormat) { throw_no_cuda(); } -void cv::gpu::VideoWriter_GPU::open(const cv::Ptr&, cv::Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); } -bool cv::gpu::VideoWriter_GPU::isOpened() const { return false; } -void cv::gpu::VideoWriter_GPU::close() {} -void cv::gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat&, bool) { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::EncoderParams cv::gpu::VideoWriter_GPU::getParams() const { EncoderParams params; throw_no_cuda(); return params; } +Ptr cv::gpucodec::createVideoWriter(const String&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpucodec::createVideoWriter(const String&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr(); } -cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams() { throw_no_cuda(); } -cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const String&) { throw_no_cuda(); } -void cv::gpu::VideoWriter_GPU::EncoderParams::load(const String&) { throw_no_cuda(); } -void cv::gpu::VideoWriter_GPU::EncoderParams::save(const String&) const { throw_no_cuda(); } +Ptr cv::gpucodec::createVideoWriter(const Ptr&, Size, double, SurfaceFormat) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpucodec::createVideoWriter(const Ptr&, Size, double, const EncoderParams&, SurfaceFormat) { throw_no_cuda(); return Ptr(); } #else // !defined HAVE_CUDA || !defined WIN32 +namespace cv { namespace gpu { namespace cudev +{ + void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream = 0); +}}} + /////////////////////////////////////////////////////////////////////////// -// VideoWriter_GPU::Impl +// VideoWriterImpl namespace { @@ -84,7 +81,7 @@ namespace err = NVGetHWEncodeCaps(); if (err) - CV_Error(cv::Error::GpuNotSupported, "No CUDA capability present"); + CV_Error(Error::GpuNotSupported, "No CUDA capability present"); // Create the Encoder API Interface err = NVCreateEncoder(&encoder_); @@ -108,405 +105,395 @@ namespace enum CodecType { - MPEG1, //not supported yet - MPEG2, //not supported yet - MPEG4, //not supported yet + MPEG1, // not supported yet + MPEG2, // not supported yet + MPEG4, // not supported yet H264 }; -} -class cv::gpu::VideoWriter_GPU::Impl -{ -public: - Impl(const cv::Ptr& callback, cv::Size frameSize, double fps, SurfaceFormat format, CodecType codec = H264); - Impl(const cv::Ptr& callback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec = H264); - - void write(const cv::gpu::GpuMat& image, bool lastFrame); - - EncoderParams getParams() const; - -private: - Impl(const Impl&); - Impl& operator=(const Impl&); - - void initEncoder(double fps); - void setEncodeParams(const EncoderParams& params); - void initGpuMemory(); - void initCallBacks(); - void createHWEncoder(); - - cv::Ptr callback_; - cv::Size frameSize_; - - CodecType codec_; - SurfaceFormat inputFormat_; - NVVE_SurfaceFormat surfaceFormat_; - - NVEncoderWrapper encoder_; - - cv::gpu::GpuMat videoFrame_; - CUvideoctxlock cuCtxLock_; - - // CallBacks - - static unsigned char* NVENCAPI HandleAcquireBitStream(int* pBufferSize, void* pUserdata); - static void NVENCAPI HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata); - static void NVENCAPI HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata); - static void NVENCAPI HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata); -}; - -cv::gpu::VideoWriter_GPU::Impl::Impl(const cv::Ptr& callback, cv::Size frameSize, double fps, SurfaceFormat format, CodecType codec) : - callback_(callback), - frameSize_(frameSize), - codec_(codec), - inputFormat_(format), - cuCtxLock_(0) -{ - surfaceFormat_ = inputFormat_ == SF_BGR ? YV12 : static_cast(inputFormat_); - - initEncoder(fps); - - initGpuMemory(); - - initCallBacks(); - - createHWEncoder(); -} - -cv::gpu::VideoWriter_GPU::Impl::Impl(const cv::Ptr& callback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec) : - callback_(callback), - frameSize_(frameSize), - codec_(codec), - inputFormat_(format), - cuCtxLock_(0) -{ - surfaceFormat_ = inputFormat_ == SF_BGR ? YV12 : static_cast(inputFormat_); - - initEncoder(fps); - - setEncodeParams(params); - - initGpuMemory(); - - initCallBacks(); - - createHWEncoder(); -} - -void cv::gpu::VideoWriter_GPU::Impl::initEncoder(double fps) -{ - int err; - - // Set codec - - static const unsigned long codecs_id[] = + class VideoWriterImpl : public VideoWriter { - NV_CODEC_TYPE_MPEG1, NV_CODEC_TYPE_MPEG2, NV_CODEC_TYPE_MPEG4, NV_CODEC_TYPE_H264, NV_CODEC_TYPE_VC1 - }; - err = NVSetCodec(encoder_, codecs_id[codec_]); - if (err) - CV_Error(cv::Error::StsNotImplemented, "Codec format is not supported"); + public: + VideoWriterImpl(const Ptr& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec = H264); + VideoWriterImpl(const Ptr& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec = H264); - // Set default params + void write(InputArray frame, bool lastFrame = false); - err = NVSetDefaultParam(encoder_); - CV_Assert( err == 0 ); + EncoderParams getEncoderParams() const; - // Set some common params + private: + void initEncoder(double fps); + void setEncodeParams(const EncoderParams& params); + void initGpuMemory(); + void initCallBacks(); + void createHWEncoder(); - int inputSize[] = { frameSize_.width, frameSize_.height }; - err = NVSetParamValue(encoder_, NVVE_IN_SIZE, &inputSize); - CV_Assert( err == 0 ); - err = NVSetParamValue(encoder_, NVVE_OUT_SIZE, &inputSize); - CV_Assert( err == 0 ); + Ptr callback_; + Size frameSize_; - int aspectRatio[] = { frameSize_.width, frameSize_.height, ASPECT_RATIO_DAR }; - err = NVSetParamValue(encoder_, NVVE_ASPECT_RATIO, &aspectRatio); - CV_Assert( err == 0 ); + CodecType codec_; + SurfaceFormat inputFormat_; + NVVE_SurfaceFormat surfaceFormat_; - // FPS + NVEncoderWrapper encoder_; - int frame_rate = static_cast(fps + 0.5); - int frame_rate_base = 1; - while (fabs(static_cast(frame_rate) / frame_rate_base) - fps > 0.001) - { - frame_rate_base *= 10; - frame_rate = static_cast(fps*frame_rate_base + 0.5); - } - int FrameRate[] = { frame_rate, frame_rate_base }; - err = NVSetParamValue(encoder_, NVVE_FRAME_RATE, &FrameRate); - CV_Assert( err == 0 ); + GpuMat videoFrame_; + CUvideoctxlock cuCtxLock_; - // Select device for encoding + // CallBacks - int gpuID = cv::gpu::getDevice(); - err = NVSetParamValue(encoder_, NVVE_FORCE_GPU_SELECTION, &gpuID); - CV_Assert( err == 0 ); -} - -void cv::gpu::VideoWriter_GPU::Impl::setEncodeParams(const EncoderParams& params) -{ - int err; - - int P_Interval = params.P_Interval; - err = NVSetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval); - CV_Assert( err == 0 ); - - int IDR_Period = params.IDR_Period; - err = NVSetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period); - CV_Assert( err == 0 ); - - int DynamicGOP = params.DynamicGOP; - err = NVSetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP); - CV_Assert( err == 0 ); - - NVVE_RateCtrlType RCType = static_cast(params.RCType); - err = NVSetParamValue(encoder_, NVVE_RC_TYPE, &RCType); - CV_Assert( err == 0 ); - - int AvgBitrate = params.AvgBitrate; - err = NVSetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate); - CV_Assert( err == 0 ); - - int PeakBitrate = params.PeakBitrate; - err = NVSetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate); - CV_Assert( err == 0 ); - - int QP_Level_Intra = params.QP_Level_Intra; - err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra); - CV_Assert( err == 0 ); - - int QP_Level_InterP = params.QP_Level_InterP; - err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP); - CV_Assert( err == 0 ); - - int QP_Level_InterB = params.QP_Level_InterB; - err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB); - CV_Assert( err == 0 ); - - int DeblockMode = params.DeblockMode; - err = NVSetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode); - CV_Assert( err == 0 ); - - int ProfileLevel = params.ProfileLevel; - err = NVSetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel); - CV_Assert( err == 0 ); - - int ForceIntra = params.ForceIntra; - err = NVSetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra); - CV_Assert( err == 0 ); - - int ForceIDR = params.ForceIDR; - err = NVSetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR); - CV_Assert( err == 0 ); - - int ClearStat = params.ClearStat; - err = NVSetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat); - CV_Assert( err == 0 ); - - NVVE_DI_MODE DIMode = static_cast(params.DIMode); - err = NVSetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode); - CV_Assert( err == 0 ); - - if (params.Presets != -1) - { - NVVE_PRESETS_TARGET Presets = static_cast(params.Presets); - err = NVSetParamValue(encoder_, NVVE_PRESETS, &Presets); - CV_Assert ( err == 0 ); - } - - int DisableCabac = params.DisableCabac; - err = NVSetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac); - CV_Assert ( err == 0 ); - - int NaluFramingType = params.NaluFramingType; - err = NVSetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType); - CV_Assert ( err == 0 ); - - int DisableSPSPPS = params.DisableSPSPPS; - err = NVSetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS); - CV_Assert ( err == 0 ); -} - -cv::gpu::VideoWriter_GPU::EncoderParams cv::gpu::VideoWriter_GPU::Impl::getParams() const -{ - int err; - - EncoderParams params; - - int P_Interval; - err = NVGetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval); - CV_Assert( err == 0 ); - params.P_Interval = P_Interval; - - int IDR_Period; - err = NVGetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period); - CV_Assert( err == 0 ); - params.IDR_Period = IDR_Period; - - int DynamicGOP; - err = NVGetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP); - CV_Assert( err == 0 ); - params.DynamicGOP = DynamicGOP; - - NVVE_RateCtrlType RCType; - err = NVGetParamValue(encoder_, NVVE_RC_TYPE, &RCType); - CV_Assert( err == 0 ); - params.RCType = RCType; - - int AvgBitrate; - err = NVGetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate); - CV_Assert( err == 0 ); - params.AvgBitrate = AvgBitrate; - - int PeakBitrate; - err = NVGetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate); - CV_Assert( err == 0 ); - params.PeakBitrate = PeakBitrate; - - int QP_Level_Intra; - err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra); - CV_Assert( err == 0 ); - params.QP_Level_Intra = QP_Level_Intra; - - int QP_Level_InterP; - err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP); - CV_Assert( err == 0 ); - params.QP_Level_InterP = QP_Level_InterP; - - int QP_Level_InterB; - err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB); - CV_Assert( err == 0 ); - params.QP_Level_InterB = QP_Level_InterB; - - int DeblockMode; - err = NVGetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode); - CV_Assert( err == 0 ); - params.DeblockMode = DeblockMode; - - int ProfileLevel; - err = NVGetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel); - CV_Assert( err == 0 ); - params.ProfileLevel = ProfileLevel; - - int ForceIntra; - err = NVGetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra); - CV_Assert( err == 0 ); - params.ForceIntra = ForceIntra; - - int ForceIDR; - err = NVGetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR); - CV_Assert( err == 0 ); - params.ForceIDR = ForceIDR; - - int ClearStat; - err = NVGetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat); - CV_Assert( err == 0 ); - params.ClearStat = ClearStat; - - NVVE_DI_MODE DIMode; - err = NVGetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode); - CV_Assert( err == 0 ); - params.DIMode = DIMode; - - params.Presets = -1; - - int DisableCabac; - err = NVGetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac); - CV_Assert ( err == 0 ); - params.DisableCabac = DisableCabac; - - int NaluFramingType; - err = NVGetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType); - CV_Assert ( err == 0 ); - params.NaluFramingType = NaluFramingType; - - int DisableSPSPPS; - err = NVGetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS); - CV_Assert ( err == 0 ); - params.DisableSPSPPS = DisableSPSPPS; - - return params; -} - -void cv::gpu::VideoWriter_GPU::Impl::initGpuMemory() -{ - int err; - CUresult cuRes; - - // initialize context - cv::gpu::GpuMat temp(1, 1, CV_8U); - temp.release(); - - static const int bpp[] = - { - 16, // UYVY, 4:2:2 - 16, // YUY2, 4:2:2 - 12, // YV12, 4:2:0 - 12, // NV12, 4:2:0 - 12, // IYUV, 4:2:0 + static unsigned char* NVENCAPI HandleAcquireBitStream(int* pBufferSize, void* pUserdata); + static void NVENCAPI HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata); + static void NVENCAPI HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata); + static void NVENCAPI HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata); }; - CUcontext cuContext; - cuRes = cuCtxGetCurrent(&cuContext); - CV_Assert( cuRes == CUDA_SUCCESS ); + VideoWriterImpl::VideoWriterImpl(const Ptr& callback, Size frameSize, double fps, SurfaceFormat format, CodecType codec) : + callback_(callback), + frameSize_(frameSize), + codec_(codec), + inputFormat_(format), + cuCtxLock_(0) + { + surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast(inputFormat_)); - // Allocate the CUDA memory Pitched Surface - if (surfaceFormat_ == UYVY || surfaceFormat_ == YUY2) - videoFrame_.create(frameSize_.height, (frameSize_.width * bpp[surfaceFormat_]) / 8, CV_8UC1); - else - videoFrame_.create((frameSize_.height * bpp[surfaceFormat_]) / 8, frameSize_.width, CV_8UC1); + initEncoder(fps); - // Create the Video Context Lock (used for synchronization) - cuRes = cuvidCtxLockCreate(&cuCtxLock_, cuContext); - CV_Assert( cuRes == CUDA_SUCCESS ); + initGpuMemory(); - // If we are using GPU Device Memory with NVCUVENC, it is necessary to create a - // CUDA Context with a Context Lock cuvidCtxLock. The Context Lock needs to be passed to NVCUVENC + initCallBacks(); - int iUseDeviceMem = 1; - err = NVSetParamValue(encoder_, NVVE_DEVICE_MEMORY_INPUT, &iUseDeviceMem); - CV_Assert ( err == 0 ); + createHWEncoder(); + } - err = NVSetParamValue(encoder_, NVVE_DEVICE_CTX_LOCK, &cuCtxLock_); - CV_Assert ( err == 0 ); -} + VideoWriterImpl::VideoWriterImpl(const Ptr& callback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format, CodecType codec) : + callback_(callback), + frameSize_(frameSize), + codec_(codec), + inputFormat_(format), + cuCtxLock_(0) + { + surfaceFormat_ = (inputFormat_ == SF_BGR ? YV12 : static_cast(inputFormat_)); -void cv::gpu::VideoWriter_GPU::Impl::initCallBacks() -{ - NVVE_CallbackParams cb; - memset(&cb, 0, sizeof(NVVE_CallbackParams)); + initEncoder(fps); - cb.pfnacquirebitstream = HandleAcquireBitStream; - cb.pfnonbeginframe = HandleOnBeginFrame; - cb.pfnonendframe = HandleOnEndFrame; - cb.pfnreleasebitstream = HandleReleaseBitStream; + setEncodeParams(params); - NVRegisterCB(encoder_, cb, this); -} + initGpuMemory(); -void cv::gpu::VideoWriter_GPU::Impl::createHWEncoder() -{ - int err; + initCallBacks(); - // Create the NVIDIA HW resources for Encoding on NVIDIA hardware - err = NVCreateHWEncoder(encoder_); - CV_Assert( err == 0 ); -} + createHWEncoder(); + } + + void VideoWriterImpl::initEncoder(double fps) + { + int err; + + // Set codec + + static const unsigned long codecs_id[] = + { + NV_CODEC_TYPE_MPEG1, NV_CODEC_TYPE_MPEG2, NV_CODEC_TYPE_MPEG4, NV_CODEC_TYPE_H264, NV_CODEC_TYPE_VC1 + }; + err = NVSetCodec(encoder_, codecs_id[codec_]); + if (err) + CV_Error(Error::StsNotImplemented, "Codec format is not supported"); + + // Set default params + + err = NVSetDefaultParam(encoder_); + CV_Assert( err == 0 ); + + // Set some common params + + int inputSize[] = { frameSize_.width, frameSize_.height }; + err = NVSetParamValue(encoder_, NVVE_IN_SIZE, &inputSize); + CV_Assert( err == 0 ); + err = NVSetParamValue(encoder_, NVVE_OUT_SIZE, &inputSize); + CV_Assert( err == 0 ); + + int aspectRatio[] = { frameSize_.width, frameSize_.height, ASPECT_RATIO_DAR }; + err = NVSetParamValue(encoder_, NVVE_ASPECT_RATIO, &aspectRatio); + CV_Assert( err == 0 ); + + // FPS + + int frame_rate = static_cast(fps + 0.5); + int frame_rate_base = 1; + while (fabs(static_cast(frame_rate) / frame_rate_base) - fps > 0.001) + { + frame_rate_base *= 10; + frame_rate = static_cast(fps*frame_rate_base + 0.5); + } + int FrameRate[] = { frame_rate, frame_rate_base }; + err = NVSetParamValue(encoder_, NVVE_FRAME_RATE, &FrameRate); + CV_Assert( err == 0 ); + + // Select device for encoding + + int gpuID = getDevice(); + err = NVSetParamValue(encoder_, NVVE_FORCE_GPU_SELECTION, &gpuID); + CV_Assert( err == 0 ); + } + + void VideoWriterImpl::setEncodeParams(const EncoderParams& params) + { + int err; + + int P_Interval = params.P_Interval; + err = NVSetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval); + CV_Assert( err == 0 ); + + int IDR_Period = params.IDR_Period; + err = NVSetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period); + CV_Assert( err == 0 ); + + int DynamicGOP = params.DynamicGOP; + err = NVSetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP); + CV_Assert( err == 0 ); + + NVVE_RateCtrlType RCType = static_cast(params.RCType); + err = NVSetParamValue(encoder_, NVVE_RC_TYPE, &RCType); + CV_Assert( err == 0 ); + + int AvgBitrate = params.AvgBitrate; + err = NVSetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate); + CV_Assert( err == 0 ); + + int PeakBitrate = params.PeakBitrate; + err = NVSetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate); + CV_Assert( err == 0 ); + + int QP_Level_Intra = params.QP_Level_Intra; + err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra); + CV_Assert( err == 0 ); + + int QP_Level_InterP = params.QP_Level_InterP; + err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP); + CV_Assert( err == 0 ); + + int QP_Level_InterB = params.QP_Level_InterB; + err = NVSetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB); + CV_Assert( err == 0 ); + + int DeblockMode = params.DeblockMode; + err = NVSetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode); + CV_Assert( err == 0 ); + + int ProfileLevel = params.ProfileLevel; + err = NVSetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel); + CV_Assert( err == 0 ); + + int ForceIntra = params.ForceIntra; + err = NVSetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra); + CV_Assert( err == 0 ); + + int ForceIDR = params.ForceIDR; + err = NVSetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR); + CV_Assert( err == 0 ); + + int ClearStat = params.ClearStat; + err = NVSetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat); + CV_Assert( err == 0 ); + + NVVE_DI_MODE DIMode = static_cast(params.DIMode); + err = NVSetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode); + CV_Assert( err == 0 ); + + if (params.Presets != -1) + { + NVVE_PRESETS_TARGET Presets = static_cast(params.Presets); + err = NVSetParamValue(encoder_, NVVE_PRESETS, &Presets); + CV_Assert( err == 0 ); + } + + int DisableCabac = params.DisableCabac; + err = NVSetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac); + CV_Assert( err == 0 ); + + int NaluFramingType = params.NaluFramingType; + err = NVSetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType); + CV_Assert( err == 0 ); + + int DisableSPSPPS = params.DisableSPSPPS; + err = NVSetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS); + CV_Assert( err == 0 ); + } + + EncoderParams VideoWriterImpl::getEncoderParams() const + { + int err; + + EncoderParams params; + + int P_Interval; + err = NVGetParamValue(encoder_, NVVE_P_INTERVAL, &P_Interval); + CV_Assert( err == 0 ); + params.P_Interval = P_Interval; + + int IDR_Period; + err = NVGetParamValue(encoder_, NVVE_IDR_PERIOD, &IDR_Period); + CV_Assert( err == 0 ); + params.IDR_Period = IDR_Period; + + int DynamicGOP; + err = NVGetParamValue(encoder_, NVVE_DYNAMIC_GOP, &DynamicGOP); + CV_Assert( err == 0 ); + params.DynamicGOP = DynamicGOP; + + NVVE_RateCtrlType RCType; + err = NVGetParamValue(encoder_, NVVE_RC_TYPE, &RCType); + CV_Assert( err == 0 ); + params.RCType = RCType; + + int AvgBitrate; + err = NVGetParamValue(encoder_, NVVE_AVG_BITRATE, &AvgBitrate); + CV_Assert( err == 0 ); + params.AvgBitrate = AvgBitrate; + + int PeakBitrate; + err = NVGetParamValue(encoder_, NVVE_PEAK_BITRATE, &PeakBitrate); + CV_Assert( err == 0 ); + params.PeakBitrate = PeakBitrate; + + int QP_Level_Intra; + err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTRA, &QP_Level_Intra); + CV_Assert( err == 0 ); + params.QP_Level_Intra = QP_Level_Intra; + + int QP_Level_InterP; + err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_P, &QP_Level_InterP); + CV_Assert( err == 0 ); + params.QP_Level_InterP = QP_Level_InterP; + + int QP_Level_InterB; + err = NVGetParamValue(encoder_, NVVE_QP_LEVEL_INTER_B, &QP_Level_InterB); + CV_Assert( err == 0 ); + params.QP_Level_InterB = QP_Level_InterB; + + int DeblockMode; + err = NVGetParamValue(encoder_, NVVE_DEBLOCK_MODE, &DeblockMode); + CV_Assert( err == 0 ); + params.DeblockMode = DeblockMode; + + int ProfileLevel; + err = NVGetParamValue(encoder_, NVVE_PROFILE_LEVEL, &ProfileLevel); + CV_Assert( err == 0 ); + params.ProfileLevel = ProfileLevel; + + int ForceIntra; + err = NVGetParamValue(encoder_, NVVE_FORCE_INTRA, &ForceIntra); + CV_Assert( err == 0 ); + params.ForceIntra = ForceIntra; + + int ForceIDR; + err = NVGetParamValue(encoder_, NVVE_FORCE_IDR, &ForceIDR); + CV_Assert( err == 0 ); + params.ForceIDR = ForceIDR; + + int ClearStat; + err = NVGetParamValue(encoder_, NVVE_CLEAR_STAT, &ClearStat); + CV_Assert( err == 0 ); + params.ClearStat = ClearStat; + + NVVE_DI_MODE DIMode; + err = NVGetParamValue(encoder_, NVVE_SET_DEINTERLACE, &DIMode); + CV_Assert( err == 0 ); + params.DIMode = DIMode; + + params.Presets = -1; + + int DisableCabac; + err = NVGetParamValue(encoder_, NVVE_DISABLE_CABAC, &DisableCabac); + CV_Assert( err == 0 ); + params.DisableCabac = DisableCabac; + + int NaluFramingType; + err = NVGetParamValue(encoder_, NVVE_CONFIGURE_NALU_FRAMING_TYPE, &NaluFramingType); + CV_Assert( err == 0 ); + params.NaluFramingType = NaluFramingType; + + int DisableSPSPPS; + err = NVGetParamValue(encoder_, NVVE_DISABLE_SPS_PPS, &DisableSPSPPS); + CV_Assert( err == 0 ); + params.DisableSPSPPS = DisableSPSPPS; + + return params; + } + + void VideoWriterImpl::initGpuMemory() + { + int err; + + // initialize context + GpuMat temp(1, 1, CV_8U); + temp.release(); + + static const int bpp[] = + { + 16, // UYVY, 4:2:2 + 16, // YUY2, 4:2:2 + 12, // YV12, 4:2:0 + 12, // NV12, 4:2:0 + 12, // IYUV, 4:2:0 + }; + + CUcontext cuContext; + cuSafeCall( cuCtxGetCurrent(&cuContext) ); + + // Allocate the CUDA memory Pitched Surface + if (surfaceFormat_ == UYVY || surfaceFormat_ == YUY2) + videoFrame_.create(frameSize_.height, (frameSize_.width * bpp[surfaceFormat_]) / 8, CV_8UC1); + else + videoFrame_.create((frameSize_.height * bpp[surfaceFormat_]) / 8, frameSize_.width, CV_8UC1); + + // Create the Video Context Lock (used for synchronization) + cuSafeCall( cuvidCtxLockCreate(&cuCtxLock_, cuContext) ); + + // If we are using GPU Device Memory with NVCUVENC, it is necessary to create a + // CUDA Context with a Context Lock cuvidCtxLock. The Context Lock needs to be passed to NVCUVENC + + int iUseDeviceMem = 1; + err = NVSetParamValue(encoder_, NVVE_DEVICE_MEMORY_INPUT, &iUseDeviceMem); + CV_Assert( err == 0 ); + + err = NVSetParamValue(encoder_, NVVE_DEVICE_CTX_LOCK, &cuCtxLock_); + CV_Assert( err == 0 ); + } + + void VideoWriterImpl::initCallBacks() + { + NVVE_CallbackParams cb; + memset(&cb, 0, sizeof(NVVE_CallbackParams)); + + cb.pfnacquirebitstream = HandleAcquireBitStream; + cb.pfnonbeginframe = HandleOnBeginFrame; + cb.pfnonendframe = HandleOnEndFrame; + cb.pfnreleasebitstream = HandleReleaseBitStream; + + NVRegisterCB(encoder_, cb, this); + } + + void VideoWriterImpl::createHWEncoder() + { + int err; + + // Create the NVIDIA HW resources for Encoding on NVIDIA hardware + err = NVCreateHWEncoder(encoder_); + CV_Assert( err == 0 ); + } -namespace -{ // UYVY/YUY2 are both 4:2:2 formats (16bpc) // Luma, U, V are interleaved, chroma is subsampled (w/2,h) - void copyUYVYorYUY2Frame(cv::Size frameSize, const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst) + void copyUYVYorYUY2Frame(Size frameSize, const GpuMat& src, GpuMat& dst) { - CUresult res; - // Source is YUVY/YUY2 4:2:2, the YUV data in a packed and interleaved // YUV Copy setup CUDA_MEMCPY2D stCopyYUV422; - memset((void*)&stCopyYUV422, 0, sizeof(stCopyYUV422)); + memset(&stCopyYUV422, 0, sizeof(CUDA_MEMCPY2D)); + stCopyYUV422.srcXInBytes = 0; stCopyYUV422.srcY = 0; stCopyYUV422.srcMemoryType = CU_MEMORYTYPE_DEVICE; @@ -527,21 +514,19 @@ namespace stCopyYUV422.Height = frameSize.height; // DMA Luma/Chroma - res = cuMemcpy2D(&stCopyYUV422); - CV_Assert( res == CUDA_SUCCESS ); + cuSafeCall( cuMemcpy2D(&stCopyYUV422) ); } // YV12/IYUV are both 4:2:0 planar formats (12bpc) // Luma, U, V chroma planar (12bpc), chroma is subsampled (w/2,h/2) - void copyYV12orIYUVFrame(cv::Size frameSize, const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst) + void copyYV12orIYUVFrame(Size frameSize, const GpuMat& src, GpuMat& dst) { - CUresult res; - // Source is YV12/IYUV, this native format is converted to NV12 format by the video encoder // (1) luma copy setup CUDA_MEMCPY2D stCopyLuma; - memset((void*)&stCopyLuma, 0, sizeof(stCopyLuma)); + memset(&stCopyLuma, 0, sizeof(CUDA_MEMCPY2D)); + stCopyLuma.srcXInBytes = 0; stCopyLuma.srcY = 0; stCopyLuma.srcMemoryType = CU_MEMORYTYPE_DEVICE; @@ -563,7 +548,8 @@ namespace // (2) chroma copy setup, U/V can be done together CUDA_MEMCPY2D stCopyChroma; - memset((void*)&stCopyChroma, 0, sizeof(stCopyChroma)); + memset(&stCopyChroma, 0, sizeof(CUDA_MEMCPY2D)); + stCopyChroma.srcXInBytes = 0; stCopyChroma.srcY = frameSize.height << 1; // U/V chroma offset stCopyChroma.srcMemoryType = CU_MEMORYTYPE_DEVICE; @@ -584,26 +570,23 @@ namespace stCopyChroma.Height = frameSize.height; // U/V are sent together // DMA Luma - res = cuMemcpy2D(&stCopyLuma); - CV_Assert( res == CUDA_SUCCESS ); + cuSafeCall( cuMemcpy2D(&stCopyLuma) ); // DMA Chroma channels (UV side by side) - res = cuMemcpy2D(&stCopyChroma); - CV_Assert( res == CUDA_SUCCESS ); + cuSafeCall( cuMemcpy2D(&stCopyChroma) ); } // NV12 is 4:2:0 format (12bpc) // Luma followed by U/V chroma interleaved (12bpc), chroma is subsampled (w/2,h/2) - void copyNV12Frame(cv::Size frameSize, const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst) + void copyNV12Frame(Size frameSize, const GpuMat& src, GpuMat& dst) { - CUresult res; - // Source is NV12 in pitch linear memory // Because we are assume input is NV12 (if we take input in the native format), the encoder handles NV12 as a native format in pitch linear memory // Luma/Chroma can be done in a single transfer CUDA_MEMCPY2D stCopyNV12; - memset((void*)&stCopyNV12, 0, sizeof(stCopyNV12)); + memset(&stCopyNV12, 0, sizeof(CUDA_MEMCPY2D)); + stCopyNV12.srcXInBytes = 0; stCopyNV12.srcY = 0; stCopyNV12.srcMemoryType = CU_MEMORYTYPE_DEVICE; @@ -621,141 +604,137 @@ namespace stCopyNV12.dstPitch = dst.step; stCopyNV12.WidthInBytes = frameSize.width; - stCopyNV12.Height =(frameSize.height * 3) >> 1; + stCopyNV12.Height = (frameSize.height * 3) >> 1; // DMA Luma/Chroma - res = cuMemcpy2D(&stCopyNV12); - CV_Assert( res == CUDA_SUCCESS ); - } -} - -namespace cv { namespace gpu { namespace cudev -{ - void RGB_to_YV12(const PtrStepSzb src, int cn, PtrStepSzb dst, cudaStream_t stream = 0); -}}} - -void cv::gpu::VideoWriter_GPU::Impl::write(const cv::gpu::GpuMat& frame, bool lastFrame) -{ - if (inputFormat_ == SF_BGR) - { - CV_Assert( frame.size() == frameSize_ ); - CV_Assert( frame.type() == CV_8UC1 || frame.type() == CV_8UC3 || frame.type() == CV_8UC4 ); - } - else - { - CV_Assert( frame.size() == videoFrame_.size() ); - CV_Assert( frame.type() == videoFrame_.type() ); + cuSafeCall( cuMemcpy2D(&stCopyNV12) ); } - NVVE_EncodeFrameParams efparams; - efparams.Width = frameSize_.width; - efparams.Height = frameSize_.height; - efparams.Pitch = static_cast(videoFrame_.step); - efparams.SurfFmt = surfaceFormat_; - efparams.PictureStruc = FRAME_PICTURE; - efparams.topfieldfirst = 0; - efparams.repeatFirstField = 0; - efparams.progressiveFrame = (surfaceFormat_ == NV12) ? 1 : 0; - efparams.bLast = lastFrame; - efparams.picBuf = 0; // Must be set to NULL in order to support device memory input - - // Don't forget we need to lock/unlock between memcopies - CUresult res = cuvidCtxLock(cuCtxLock_, 0); - CV_Assert( res == CUDA_SUCCESS ); - - if (inputFormat_ == SF_BGR) - cv::gpu::cudev::RGB_to_YV12(frame, frame.channels(), videoFrame_); - else + void VideoWriterImpl::write(InputArray _frame, bool lastFrame) { - switch (surfaceFormat_) + GpuMat frame = _frame.getGpuMat(); + + if (inputFormat_ == SF_BGR) { - case UYVY: // UYVY (4:2:2) - case YUY2: // YUY2 (4:2:2) - copyUYVYorYUY2Frame(frameSize_, frame, videoFrame_); - break; - - case YV12: // YV12 (4:2:0), Y V U - case IYUV: // IYUV (4:2:0), Y U V - copyYV12orIYUVFrame(frameSize_, frame, videoFrame_); - break; - - case NV12: // NV12 (4:2:0) - copyNV12Frame(frameSize_, frame, videoFrame_); - break; + CV_Assert( frame.size() == frameSize_ ); + CV_Assert( frame.type() == CV_8UC1 || frame.type() == CV_8UC3 || frame.type() == CV_8UC4 ); } + else + { + CV_Assert( frame.size() == videoFrame_.size() ); + CV_Assert( frame.type() == videoFrame_.type() ); + } + + NVVE_EncodeFrameParams efparams; + efparams.Width = frameSize_.width; + efparams.Height = frameSize_.height; + efparams.Pitch = static_cast(videoFrame_.step); + efparams.SurfFmt = surfaceFormat_; + efparams.PictureStruc = FRAME_PICTURE; + efparams.topfieldfirst = 0; + efparams.repeatFirstField = 0; + efparams.progressiveFrame = (surfaceFormat_ == NV12) ? 1 : 0; + efparams.bLast = lastFrame; + efparams.picBuf = 0; // Must be set to NULL in order to support device memory input + + // Don't forget we need to lock/unlock between memcopies + cuSafeCall( cuvidCtxLock(cuCtxLock_, 0) ); + + if (inputFormat_ == SF_BGR) + { + cudev::RGB_to_YV12(frame, frame.channels(), videoFrame_); + } + else + { + switch (surfaceFormat_) + { + case UYVY: // UYVY (4:2:2) + case YUY2: // YUY2 (4:2:2) + copyUYVYorYUY2Frame(frameSize_, frame, videoFrame_); + break; + + case YV12: // YV12 (4:2:0), Y V U + case IYUV: // IYUV (4:2:0), Y U V + copyYV12orIYUVFrame(frameSize_, frame, videoFrame_); + break; + + case NV12: // NV12 (4:2:0) + copyNV12Frame(frameSize_, frame, videoFrame_); + break; + } + } + + cuSafeCall( cuvidCtxUnlock(cuCtxLock_, 0) ); + + int err = NVEncodeFrame(encoder_, &efparams, 0, videoFrame_.data); + CV_Assert( err == 0 ); } - res = cuvidCtxUnlock(cuCtxLock_, 0); - CV_Assert( res == CUDA_SUCCESS ); - - int err = NVEncodeFrame(encoder_, &efparams, 0, videoFrame_.data); - CV_Assert( err == 0 ); -} - -unsigned char* NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleAcquireBitStream(int* pBufferSize, void* pUserdata) -{ - Impl* thiz = static_cast(pUserdata); - - return thiz->callback_->acquireBitStream(pBufferSize); -} - -void NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata) -{ - Impl* thiz = static_cast(pUserdata); - - thiz->callback_->releaseBitStream(cb, nBytesInBuffer); -} - -void NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata) -{ - Impl* thiz = static_cast(pUserdata); - - thiz->callback_->onBeginFrame(pbfi->nFrameNumber, static_cast(pbfi->nPicType)); -} - -void NVENCAPI cv::gpu::VideoWriter_GPU::Impl::HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata) -{ - Impl* thiz = static_cast(pUserdata); - - thiz->callback_->onEndFrame(pefi->nFrameNumber, static_cast(pefi->nPicType)); -} - -/////////////////////////////////////////////////////////////////////////// -// FFMPEG - -class EncoderCallBackFFMPEG : public cv::gpu::VideoWriter_GPU::EncoderCallBack -{ -public: - EncoderCallBackFFMPEG(const cv::String& fileName, cv::Size frameSize, double fps); - ~EncoderCallBackFFMPEG(); - - unsigned char* acquireBitStream(int* bufferSize); - void releaseBitStream(unsigned char* data, int size); - void onBeginFrame(int frameNumber, PicType picType); - void onEndFrame(int frameNumber, PicType picType); - -private: - EncoderCallBackFFMPEG(const EncoderCallBackFFMPEG&); - EncoderCallBackFFMPEG& operator=(const EncoderCallBackFFMPEG&); - - struct OutputMediaStream_FFMPEG* stream_; - std::vector buf_; - bool isKeyFrame_; -}; - -namespace -{ - Create_OutputMediaStream_FFMPEG_Plugin create_OutputMediaStream_FFMPEG_p = 0; - Release_OutputMediaStream_FFMPEG_Plugin release_OutputMediaStream_FFMPEG_p = 0; - Write_OutputMediaStream_FFMPEG_Plugin write_OutputMediaStream_FFMPEG_p = 0; - - bool init_MediaStream_FFMPEG() + unsigned char* NVENCAPI VideoWriterImpl::HandleAcquireBitStream(int* pBufferSize, void* pUserdata) { - static bool initialized = 0; + VideoWriterImpl* thiz = static_cast(pUserdata); + + return thiz->callback_->acquireBitStream(pBufferSize); + } + + void NVENCAPI VideoWriterImpl::HandleReleaseBitStream(int nBytesInBuffer, unsigned char* cb, void* pUserdata) + { + VideoWriterImpl* thiz = static_cast(pUserdata); + + thiz->callback_->releaseBitStream(cb, nBytesInBuffer); + } + + void NVENCAPI VideoWriterImpl::HandleOnBeginFrame(const NVVE_BeginFrameInfo* pbfi, void* pUserdata) + { + VideoWriterImpl* thiz = static_cast(pUserdata); + + thiz->callback_->onBeginFrame(pbfi->nFrameNumber, static_cast(pbfi->nPicType)); + } + + void NVENCAPI VideoWriterImpl::HandleOnEndFrame(const NVVE_EndFrameInfo* pefi, void* pUserdata) + { + VideoWriterImpl* thiz = static_cast(pUserdata); + + thiz->callback_->onEndFrame(pefi->nFrameNumber, static_cast(pefi->nPicType)); + } + + /////////////////////////////////////////////////////////////////////////// + // FFMPEG + + class EncoderCallBackFFMPEG : public EncoderCallBack + { + public: + EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps); + ~EncoderCallBackFFMPEG(); + + unsigned char* acquireBitStream(int* bufferSize); + void releaseBitStream(unsigned char* data, int size); + void onBeginFrame(int frameNumber, PicType picType); + void onEndFrame(int frameNumber, PicType picType); + + private: + static bool init_MediaStream_FFMPEG(); + + struct OutputMediaStream_FFMPEG* stream_; + std::vector buf_; + bool isKeyFrame_; + + static Create_OutputMediaStream_FFMPEG_Plugin create_OutputMediaStream_FFMPEG_p; + static Release_OutputMediaStream_FFMPEG_Plugin release_OutputMediaStream_FFMPEG_p; + static Write_OutputMediaStream_FFMPEG_Plugin write_OutputMediaStream_FFMPEG_p; + }; + + Create_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::create_OutputMediaStream_FFMPEG_p = 0; + Release_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::release_OutputMediaStream_FFMPEG_p = 0; + Write_OutputMediaStream_FFMPEG_Plugin EncoderCallBackFFMPEG::write_OutputMediaStream_FFMPEG_p = 0; + + bool EncoderCallBackFFMPEG::init_MediaStream_FFMPEG() + { + static bool initialized = false; if (!initialized) { - #if defined WIN32 || defined _WIN32 + #if defined(WIN32) || defined(_WIN32) const char* module_name = "opencv_ffmpeg" CVAUX_STR(CV_VERSION_EPOCH) CVAUX_STR(CV_VERSION_MAJOR) CVAUX_STR(CV_VERSION_MINOR) #if (defined _MSC_VER && defined _M_X64) || (defined __GNUC__ && defined __x86_64__) @@ -776,7 +755,7 @@ namespace initialized = create_OutputMediaStream_FFMPEG_p != 0 && release_OutputMediaStream_FFMPEG_p != 0 && write_OutputMediaStream_FFMPEG_p != 0; } - #elif defined HAVE_FFMPEG + #elif defined(HAVE_FFMPEG) create_OutputMediaStream_FFMPEG_p = create_OutputMediaStream_FFMPEG; release_OutputMediaStream_FFMPEG_p = release_OutputMediaStream_FFMPEG; write_OutputMediaStream_FFMPEG_p = write_OutputMediaStream_FFMPEG; @@ -787,134 +766,52 @@ namespace return initialized; } -} -EncoderCallBackFFMPEG::EncoderCallBackFFMPEG(const cv::String& fileName, cv::Size frameSize, double fps) : - stream_(0), isKeyFrame_(false) -{ - int buf_size = std::max(frameSize.area() * 4, 1024 * 1024); - buf_.resize(buf_size); + EncoderCallBackFFMPEG::EncoderCallBackFFMPEG(const String& fileName, Size frameSize, double fps) : + stream_(0), isKeyFrame_(false) + { + int buf_size = std::max(frameSize.area() * 4, 1024 * 1024); + buf_.resize(buf_size); - CV_Assert( init_MediaStream_FFMPEG() ); + CV_Assert( init_MediaStream_FFMPEG() ); - stream_ = create_OutputMediaStream_FFMPEG_p(fileName.c_str(), frameSize.width, frameSize.height, fps); - CV_Assert( stream_ != 0 ); -} + stream_ = create_OutputMediaStream_FFMPEG_p(fileName.c_str(), frameSize.width, frameSize.height, fps); + CV_Assert( stream_ != 0 ); + } -EncoderCallBackFFMPEG::~EncoderCallBackFFMPEG() -{ - release_OutputMediaStream_FFMPEG_p(stream_); -} + EncoderCallBackFFMPEG::~EncoderCallBackFFMPEG() + { + release_OutputMediaStream_FFMPEG_p(stream_); + } -unsigned char* EncoderCallBackFFMPEG::acquireBitStream(int* bufferSize) -{ - *bufferSize = static_cast(buf_.size()); - return &buf_[0]; -} + unsigned char* EncoderCallBackFFMPEG::acquireBitStream(int* bufferSize) + { + *bufferSize = static_cast(buf_.size()); + return &buf_[0]; + } -void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size) -{ - write_OutputMediaStream_FFMPEG_p(stream_, data, size, isKeyFrame_); -} + void EncoderCallBackFFMPEG::releaseBitStream(unsigned char* data, int size) + { + write_OutputMediaStream_FFMPEG_p(stream_, data, size, isKeyFrame_); + } -void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType) -{ - (void) frameNumber; - isKeyFrame_ = picType == IFRAME; -} + void EncoderCallBackFFMPEG::onBeginFrame(int frameNumber, PicType picType) + { + (void) frameNumber; + isKeyFrame_ = (picType == IFRAME); + } -void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType) -{ - (void) frameNumber; - (void) picType; + void EncoderCallBackFFMPEG::onEndFrame(int frameNumber, PicType picType) + { + (void) frameNumber; + (void) picType; + } } /////////////////////////////////////////////////////////////////////////// -// VideoWriter_GPU +// EncoderParams -cv::gpu::VideoWriter_GPU::VideoWriter_GPU() -{ -} - -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format) -{ - open(fileName, frameSize, fps, format); -} - -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format) -{ - open(fileName, frameSize, fps, params, format); -} - -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format) -{ - open(encoderCallback, frameSize, fps, format); -} - -cv::gpu::VideoWriter_GPU::VideoWriter_GPU(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format) -{ - open(encoderCallback, frameSize, fps, params, format); -} - -cv::gpu::VideoWriter_GPU::~VideoWriter_GPU() -{ - close(); -} - -void cv::gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, SurfaceFormat format) -{ - close(); - cv::Ptr encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps)); - open(encoderCallback, frameSize, fps, format); -} - -void cv::gpu::VideoWriter_GPU::open(const String& fileName, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format) -{ - close(); - cv::Ptr encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps)); - open(encoderCallback, frameSize, fps, params, format); -} - -void cv::gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, SurfaceFormat format) -{ - close(); - impl_ = new Impl(encoderCallback, frameSize, fps, format); -} - -void cv::gpu::VideoWriter_GPU::open(const cv::Ptr& encoderCallback, cv::Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format) -{ - close(); - impl_ = new Impl(encoderCallback, frameSize, fps, params, format); -} - -bool cv::gpu::VideoWriter_GPU::isOpened() const -{ - return !impl_.empty(); -} - -void cv::gpu::VideoWriter_GPU::close() -{ - impl_.release(); -} - -void cv::gpu::VideoWriter_GPU::write(const cv::gpu::GpuMat& image, bool lastFrame) -{ - CV_Assert( isOpened() ); - - impl_->write(image, lastFrame); -} - -cv::gpu::VideoWriter_GPU::EncoderParams cv::gpu::VideoWriter_GPU::getParams() const -{ - CV_Assert( isOpened() ); - - return impl_->getParams(); -} - -/////////////////////////////////////////////////////////////////////////// -// VideoWriter_GPU::EncoderParams - -cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams() +cv::gpucodec::EncoderParams::EncoderParams() { P_Interval = 3; IDR_Period = 15; @@ -937,66 +834,86 @@ cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams() DisableSPSPPS = 0; } -cv::gpu::VideoWriter_GPU::EncoderParams::EncoderParams(const String& configFile) +cv::gpucodec::EncoderParams::EncoderParams(const String& configFile) { load(configFile); } -void cv::gpu::VideoWriter_GPU::EncoderParams::load(const String& configFile) +void cv::gpucodec::EncoderParams::load(const String& configFile) { - cv::FileStorage fs(configFile, cv::FileStorage::READ); + FileStorage fs(configFile, FileStorage::READ); CV_Assert( fs.isOpened() ); - cv::read(fs["P_Interval" ], P_Interval, 3); - cv::read(fs["IDR_Period" ], IDR_Period, 15); - cv::read(fs["DynamicGOP" ], DynamicGOP, 0); - cv::read(fs["RCType" ], RCType, 1); - cv::read(fs["AvgBitrate" ], AvgBitrate, 4000000); - cv::read(fs["PeakBitrate" ], PeakBitrate, 10000000); - cv::read(fs["QP_Level_Intra" ], QP_Level_Intra, 25); - cv::read(fs["QP_Level_InterP"], QP_Level_InterP, 28); - cv::read(fs["QP_Level_InterB"], QP_Level_InterB, 31); - cv::read(fs["DeblockMode" ], DeblockMode, 1); - cv::read(fs["ProfileLevel" ], ProfileLevel, 65357); - cv::read(fs["ForceIntra" ], ForceIntra, 0); - cv::read(fs["ForceIDR" ], ForceIDR, 0); - cv::read(fs["ClearStat" ], ClearStat, 0); - cv::read(fs["DIMode" ], DIMode, 1); - cv::read(fs["Presets" ], Presets, 2); - cv::read(fs["DisableCabac" ], DisableCabac, 0); - cv::read(fs["NaluFramingType"], NaluFramingType, 0); - cv::read(fs["DisableSPSPPS" ], DisableSPSPPS, 0); + read(fs["P_Interval" ], P_Interval, 3); + read(fs["IDR_Period" ], IDR_Period, 15); + read(fs["DynamicGOP" ], DynamicGOP, 0); + read(fs["RCType" ], RCType, 1); + read(fs["AvgBitrate" ], AvgBitrate, 4000000); + read(fs["PeakBitrate" ], PeakBitrate, 10000000); + read(fs["QP_Level_Intra" ], QP_Level_Intra, 25); + read(fs["QP_Level_InterP"], QP_Level_InterP, 28); + read(fs["QP_Level_InterB"], QP_Level_InterB, 31); + read(fs["DeblockMode" ], DeblockMode, 1); + read(fs["ProfileLevel" ], ProfileLevel, 65357); + read(fs["ForceIntra" ], ForceIntra, 0); + read(fs["ForceIDR" ], ForceIDR, 0); + read(fs["ClearStat" ], ClearStat, 0); + read(fs["DIMode" ], DIMode, 1); + read(fs["Presets" ], Presets, 2); + read(fs["DisableCabac" ], DisableCabac, 0); + read(fs["NaluFramingType"], NaluFramingType, 0); + read(fs["DisableSPSPPS" ], DisableSPSPPS, 0); } -void cv::gpu::VideoWriter_GPU::EncoderParams::save(const String& configFile) const +void cv::gpucodec::EncoderParams::save(const String& configFile) const { - cv::FileStorage fs(configFile, cv::FileStorage::WRITE); + FileStorage fs(configFile, FileStorage::WRITE); CV_Assert( fs.isOpened() ); - cv::write(fs, "P_Interval" , P_Interval); - cv::write(fs, "IDR_Period" , IDR_Period); - cv::write(fs, "DynamicGOP" , DynamicGOP); - cv::write(fs, "RCType" , RCType); - cv::write(fs, "AvgBitrate" , AvgBitrate); - cv::write(fs, "PeakBitrate" , PeakBitrate); - cv::write(fs, "QP_Level_Intra" , QP_Level_Intra); - cv::write(fs, "QP_Level_InterP", QP_Level_InterP); - cv::write(fs, "QP_Level_InterB", QP_Level_InterB); - cv::write(fs, "DeblockMode" , DeblockMode); - cv::write(fs, "ProfileLevel" , ProfileLevel); - cv::write(fs, "ForceIntra" , ForceIntra); - cv::write(fs, "ForceIDR" , ForceIDR); - cv::write(fs, "ClearStat" , ClearStat); - cv::write(fs, "DIMode" , DIMode); - cv::write(fs, "Presets" , Presets); - cv::write(fs, "DisableCabac" , DisableCabac); - cv::write(fs, "NaluFramingType", NaluFramingType); - cv::write(fs, "DisableSPSPPS" , DisableSPSPPS); + write(fs, "P_Interval" , P_Interval); + write(fs, "IDR_Period" , IDR_Period); + write(fs, "DynamicGOP" , DynamicGOP); + write(fs, "RCType" , RCType); + write(fs, "AvgBitrate" , AvgBitrate); + write(fs, "PeakBitrate" , PeakBitrate); + write(fs, "QP_Level_Intra" , QP_Level_Intra); + write(fs, "QP_Level_InterP", QP_Level_InterP); + write(fs, "QP_Level_InterB", QP_Level_InterB); + write(fs, "DeblockMode" , DeblockMode); + write(fs, "ProfileLevel" , ProfileLevel); + write(fs, "ForceIntra" , ForceIntra); + write(fs, "ForceIDR" , ForceIDR); + write(fs, "ClearStat" , ClearStat); + write(fs, "DIMode" , DIMode); + write(fs, "Presets" , Presets); + write(fs, "DisableCabac" , DisableCabac); + write(fs, "NaluFramingType", NaluFramingType); + write(fs, "DisableSPSPPS" , DisableSPSPPS); +} + +/////////////////////////////////////////////////////////////////////////// +// createVideoWriter + +Ptr cv::gpucodec::createVideoWriter(const String& fileName, Size frameSize, double fps, SurfaceFormat format) +{ + Ptr encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps)); + return createVideoWriter(encoderCallback, frameSize, fps, format); +} + +Ptr cv::gpucodec::createVideoWriter(const String& fileName, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format) +{ + Ptr encoderCallback(new EncoderCallBackFFMPEG(fileName, frameSize, fps)); + return createVideoWriter(encoderCallback, frameSize, fps, params, format); +} + +Ptr cv::gpucodec::createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, SurfaceFormat format) +{ + return new VideoWriterImpl(encoderCallback, frameSize, fps, format); +} + +Ptr cv::gpucodec::createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format) +{ + return new VideoWriterImpl(encoderCallback, frameSize, fps, params, format); } #endif // !defined HAVE_CUDA || !defined WIN32 - -template <> void cv::Ptr::delete_obj() -{ - if (obj) delete obj; -} diff --git a/modules/gpucodec/test/test_video.cpp b/modules/gpucodec/test/test_video.cpp index 55fc3f87c..a073a969a 100644 --- a/modules/gpucodec/test/test_video.cpp +++ b/modules/gpucodec/test/test_video.cpp @@ -89,7 +89,7 @@ GPU_TEST_P(Video, Writer) cv::VideoCapture reader(inputFile); ASSERT_TRUE(reader.isOpened()); - cv::gpu::VideoWriter_GPU d_writer; + cv::Ptr d_writer; cv::Mat frame; cv::gpu::GpuMat d_frame; @@ -101,14 +101,14 @@ GPU_TEST_P(Video, Writer) d_frame.upload(frame); - if (!d_writer.isOpened()) - d_writer.open(outputFile, frame.size(), FPS); + if (d_writer.empty()) + d_writer = cv::gpucodec::createVideoWriter(outputFile, frame.size(), FPS); - d_writer.write(d_frame); + d_writer->write(d_frame); } reader.release(); - d_writer.close(); + d_writer.release(); reader.open(outputFile); ASSERT_TRUE(reader.isOpened()); diff --git a/samples/gpu/video_writer.cpp b/samples/gpu/video_writer.cpp index d540d0409..c1bcc5d36 100644 --- a/samples/gpu/video_writer.cpp +++ b/samples/gpu/video_writer.cpp @@ -33,7 +33,7 @@ int main(int argc, const char* argv[]) cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice()); cv::VideoWriter writer; - cv::gpu::VideoWriter_GPU d_writer; + cv::Ptr d_writer; cv::Mat frame; cv::gpu::GpuMat d_frame; @@ -64,11 +64,11 @@ int main(int argc, const char* argv[]) return -1; } - if (!d_writer.isOpened()) + if (d_writer.empty()) { std::cout << "Open GPU Writer" << std::endl; - d_writer.open("output_gpu.avi", frame.size(), FPS); + d_writer = cv::gpucodec::createVideoWriter("output_gpu.avi", frame.size(), FPS); } d_frame.upload(frame); @@ -81,7 +81,7 @@ int main(int argc, const char* argv[]) cpu_times.push_back(tm.getTimeMilli()); tm.reset(); tm.start(); - d_writer.write(d_frame); + d_writer->write(d_frame); tm.stop(); gpu_times.push_back(tm.getTimeMilli()); } From 76211709a3ee38a7212340fb17b8d12ffbaef506 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 11 Jun 2013 13:05:02 +0400 Subject: [PATCH 024/121] refactored VideoReader (convert it to abstract interface) also refactored VideoSource interface (made it simplier, now it has only 2 abstract methods) --- modules/gpucodec/doc/videodec.rst | 201 +++------ modules/gpucodec/include/opencv2/gpucodec.hpp | 151 +++---- modules/gpucodec/perf/perf_video.cpp | 5 +- modules/gpucodec/src/cuda/nv12_to_rgb.cu | 7 +- modules/gpucodec/src/cuvid_video_source.cpp | 35 +- ..._video_source.h => cuvid_video_source.hpp} | 25 +- modules/gpucodec/src/ffmpeg_video_source.cpp | 81 +--- ...video_source.h => ffmpeg_video_source.hpp} | 37 +- modules/gpucodec/src/frame_queue.cpp | 13 +- .../src/{frame_queue.h => frame_queue.hpp} | 11 +- modules/gpucodec/src/precomp.hpp | 13 +- modules/gpucodec/src/thread.cpp | 17 +- modules/gpucodec/src/{thread.h => thread.hpp} | 13 +- modules/gpucodec/src/video_decoder.cpp | 7 +- .../{video_decoder.h => video_decoder.hpp} | 25 +- modules/gpucodec/src/video_parser.cpp | 21 +- .../src/{video_parser.h => video_parser.hpp} | 21 +- modules/gpucodec/src/video_reader.cpp | 418 ++++++------------ modules/gpucodec/src/video_source.cpp | 121 +++++ modules/gpucodec/src/video_source.hpp | 99 +++++ modules/gpucodec/test/test_video.cpp | 8 +- modules/superres/src/frame_source.cpp | 10 +- samples/gpu/video_reader.cpp | 5 +- 23 files changed, 616 insertions(+), 728 deletions(-) rename modules/gpucodec/src/{cuvid_video_source.h => cuvid_video_source.hpp} (88%) rename modules/gpucodec/src/{ffmpeg_video_source.h => ffmpeg_video_source.hpp} (75%) rename modules/gpucodec/src/{frame_queue.h => frame_queue.hpp} (93%) rename modules/gpucodec/src/{thread.h => thread.hpp} (87%) rename modules/gpucodec/src/{video_decoder.h => video_decoder.hpp} (85%) rename modules/gpucodec/src/{video_parser.h => video_parser.hpp} (92%) create mode 100644 modules/gpucodec/src/video_source.cpp create mode 100644 modules/gpucodec/src/video_source.hpp diff --git a/modules/gpucodec/doc/videodec.rst b/modules/gpucodec/doc/videodec.rst index 342203223..e2da30559 100644 --- a/modules/gpucodec/doc/videodec.rst +++ b/modules/gpucodec/doc/videodec.rst @@ -5,20 +5,37 @@ Video Decoding -gpu::VideoReader_GPU --------------------- -Video reader class. +gpucodec::VideoReader +--------------------- +Video reader interface. -.. ocv:class:: gpu::VideoReader_GPU +.. ocv:class:: gpucodec::VideoReader -gpu::VideoReader_GPU::Codec ---------------------------- +gpucodec::VideoReader::nextFrame +-------------------------------- +Grabs, decodes and returns the next video frame. -Video codecs supported by :ocv:class:`gpu::VideoReader_GPU` . +.. ocv:function:: bool gpucodec::VideoReader::nextFrame(OutputArray frame) -.. ocv:enum:: gpu::VideoReader_GPU::Codec +If no frames has been grabbed (there are no more frames in video file), the methods return ``false`` . The method throws :ocv:class:`Exception` if error occurs. + + + +gpucodec::VideoReader::format +----------------------------- +Returns information about video file format. + +.. ocv:function:: FormatInfo gpucodec::VideoReader::format() const + + + +gpucodec::Codec +--------------- +Video codecs supported by :ocv:class:`gpucodec::VideoReader` . + +.. ocv:enum:: gpucodec::Codec .. ocv:emember:: MPEG1 = 0 .. ocv:emember:: MPEG2 @@ -50,12 +67,12 @@ Video codecs supported by :ocv:class:`gpu::VideoReader_GPU` . UYVY (4:2:2) -gpu::VideoReader_GPU::ChromaFormat ----------------------------------- -Chroma formats supported by :ocv:class:`gpu::VideoReader_GPU` . +gpucodec::ChromaFormat +---------------------- +Chroma formats supported by :ocv:class:`gpucodec::VideoReader` . -.. ocv:enum:: gpu::VideoReader_GPU::ChromaFormat +.. ocv:enum:: gpucodec::ChromaFormat .. ocv:emember:: Monochrome = 0 .. ocv:emember:: YUV420 @@ -63,9 +80,10 @@ Chroma formats supported by :ocv:class:`gpu::VideoReader_GPU` . .. ocv:emember:: YUV444 -gpu::VideoReader_GPU::FormatInfo --------------------------------- -.. ocv:struct:: gpu::VideoReader_GPU::FormatInfo + +gpucodec::FormatInfo +-------------------- +.. ocv:struct:: gpucodec::FormatInfo Struct providing information about video file format. :: @@ -78,157 +96,58 @@ Struct providing information about video file format. :: }; -gpu::VideoReader_GPU::VideoReader_GPU -------------------------------------- -Constructors. -.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU() -.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const String& filename) -.. ocv:function:: gpu::VideoReader_GPU::VideoReader_GPU(const cv::Ptr& source) +gpucodec::createVideoReader +--------------------------- +Creates video reader. + +.. ocv:function:: Ptr gpucodec::createVideoReader(const String& filename) +.. ocv:function:: Ptr gpucodec::createVideoReader(const Ptr& source) :param filename: Name of the input video file. - :param source: Video file parser implemented by user. + :param source: RAW video source implemented by user. -The constructors initialize video reader. FFMPEG is used to read videos. User can implement own demultiplexing with :ocv:class:`gpu::VideoReader_GPU::VideoSource` . +FFMPEG is used to read videos. User can implement own demultiplexing with :ocv:class:`gpucodec::RawVideoSource` . -gpu::VideoReader_GPU::open --------------------------- -Initializes or reinitializes video reader. - -.. ocv:function:: void gpu::VideoReader_GPU::open(const String& filename) -.. ocv:function:: void gpu::VideoReader_GPU::open(const cv::Ptr& source) - -The method opens video reader. Parameters are the same as in the constructor :ocv:func:`gpu::VideoReader_GPU::VideoReader_GPU` . The method throws :ocv:class:`Exception` if error occurs. - - - -gpu::VideoReader_GPU::isOpened ------------------------------- -Returns true if video reader has been successfully initialized. - -.. ocv:function:: bool gpu::VideoReader_GPU::isOpened() const - - - -gpu::VideoReader_GPU::close ---------------------------- -Releases the video reader. - -.. ocv:function:: void gpu::VideoReader_GPU::close() - - - -gpu::VideoReader_GPU::read --------------------------- -Grabs, decodes and returns the next video frame. - -.. ocv:function:: bool gpu::VideoReader_GPU::read(GpuMat& image) - -If no frames has been grabbed (there are no more frames in video file), the methods return ``false`` . The method throws :ocv:class:`Exception` if error occurs. - - - -gpu::VideoReader_GPU::format ----------------------------- -Returns information about video file format. - -.. ocv:function:: FormatInfo gpu::VideoReader_GPU::format() const - -The method throws :ocv:class:`Exception` if video reader wasn't initialized. - - - -gpu::VideoReader_GPU::dumpFormat --------------------------------- -Dump information about video file format to specified stream. - -.. ocv:function:: void gpu::VideoReader_GPU::dumpFormat(std::ostream& st) - - :param st: Output stream. - -The method throws :ocv:class:`Exception` if video reader wasn't initialized. - - - -gpu::VideoReader_GPU::VideoSource ------------------------------------ -.. ocv:class:: gpu::VideoReader_GPU::VideoSource +gpucodec::RawVideoSource +------------------------ +.. ocv:class:: gpucodec::RawVideoSource Interface for video demultiplexing. :: - class VideoSource + class RawVideoSource { public: - VideoSource(); - virtual ~VideoSource() {} + virtual ~RawVideoSource() {} + + virtual bool getNextPacket(unsigned char** data, int* size, bool* endOfFile) = 0; virtual FormatInfo format() const = 0; - virtual void start() = 0; - virtual void stop() = 0; - virtual bool isStarted() const = 0; - virtual bool hasError() const = 0; - - protected: - bool parseVideoData(const unsigned char* data, size_t size, bool endOfStream = false); }; User can implement own demultiplexing by implementing this interface. -gpu::VideoReader_GPU::VideoSource::format ------------------------------------------ -Returns information about video file format. - -.. ocv:function:: virtual FormatInfo gpu::VideoReader_GPU::VideoSource::format() const = 0 - - - -gpu::VideoReader_GPU::VideoSource::start ----------------------------------------- -Starts processing. - -.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::start() = 0 - -Implementation must create own thread with video processing and call periodic :ocv:func:`gpu::VideoReader_GPU::VideoSource::parseVideoData` . - - - -gpu::VideoReader_GPU::VideoSource::stop +gpucodec::RawVideoSource::getNextPacket --------------------------------------- -Stops processing. +Returns next packet with RAW video frame. -.. ocv:function:: virtual void gpu::VideoReader_GPU::VideoSource::stop() = 0 +.. ocv:function:: bool gpucodec::VideoSource::getNextPacket(unsigned char** data, int* size, bool* endOfFile) = 0 - - -gpu::VideoReader_GPU::VideoSource::isStarted --------------------------------------------- -Returns ``true`` if processing was successfully started. - -.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::isStarted() const = 0 - - - -gpu::VideoReader_GPU::VideoSource::hasError -------------------------------------------- -Returns ``true`` if error occured during processing. - -.. ocv:function:: virtual bool gpu::VideoReader_GPU::VideoSource::hasError() const = 0 - - - -gpu::VideoReader_GPU::VideoSource::parseVideoData -------------------------------------------------- -Parse next video frame. Implementation must call this method after new frame was grabbed. - -.. ocv:function:: bool gpu::VideoReader_GPU::VideoSource::parseVideoData(const uchar* data, size_t size, bool endOfStream = false) - - :param data: Pointer to frame data. Can be ``NULL`` if ``endOfStream`` if ``true`` . + :param data: Pointer to frame data. :param size: Size in bytes of current frame. :param endOfStream: Indicates that it is end of stream. + + + +gpucodec::RawVideoSource::format +-------------------------------- +Returns information about video file format. + +.. ocv:function:: virtual FormatInfo gpucodec::RawVideoSource::format() const = 0 diff --git a/modules/gpucodec/include/opencv2/gpucodec.hpp b/modules/gpucodec/include/opencv2/gpucodec.hpp index 8c3cd86c7..f2e298fd7 100644 --- a/modules/gpucodec/include/opencv2/gpucodec.hpp +++ b/modules/gpucodec/include/opencv2/gpucodec.hpp @@ -48,8 +48,6 @@ # error gpucodec.hpp header must be compiled as C++ #endif -#include - #include "opencv2/core/gpu.hpp" namespace cv { namespace gpucodec { @@ -144,112 +142,65 @@ CV_EXPORTS Ptr createVideoWriter(const String& fileName, Size frame CV_EXPORTS Ptr createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, SurfaceFormat format = SF_BGR); CV_EXPORTS Ptr createVideoWriter(const Ptr& encoderCallback, Size frameSize, double fps, const EncoderParams& params, SurfaceFormat format = SF_BGR); -}} // namespace cv { namespace gpucodec { - -namespace cv { namespace gpu { - ////////////////////////////////// Video Decoding ////////////////////////////////////////// -namespace detail +enum Codec { - class FrameQueue; - class VideoParser; -} + MPEG1 = 0, + MPEG2, + MPEG4, + VC1, + H264, + JPEG, + H264_SVC, + H264_MVC, -class CV_EXPORTS VideoReader_GPU -{ -public: - enum Codec - { - MPEG1 = 0, - MPEG2, - MPEG4, - VC1, - H264, - JPEG, - H264_SVC, - H264_MVC, - - Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), // Y,U,V (4:2:0) - Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,V,U (4:2:0) - Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,UV (4:2:0) - Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), // YUYV/YUY2 (4:2:2) - Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')), // UYVY (4:2:2) - }; - - enum ChromaFormat - { - Monochrome=0, - YUV420, - YUV422, - YUV444, - }; - - struct FormatInfo - { - Codec codec; - ChromaFormat chromaFormat; - int width; - int height; - }; - - class VideoSource; - - VideoReader_GPU(); - explicit VideoReader_GPU(const String& filename); - explicit VideoReader_GPU(const cv::Ptr& source); - - ~VideoReader_GPU(); - - void open(const String& filename); - void open(const cv::Ptr& source); - bool isOpened() const; - - void close(); - - bool read(GpuMat& image); - - FormatInfo format() const; - void dumpFormat(std::ostream& st); - - class CV_EXPORTS VideoSource - { - public: - VideoSource() : frameQueue_(0), videoParser_(0) {} - virtual ~VideoSource() {} - - virtual FormatInfo format() const = 0; - virtual void start() = 0; - virtual void stop() = 0; - virtual bool isStarted() const = 0; - virtual bool hasError() const = 0; - - void setFrameQueue(detail::FrameQueue* frameQueue) { frameQueue_ = frameQueue; } - void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; } - - protected: - bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false); - - private: - VideoSource(const VideoSource&); - VideoSource& operator =(const VideoSource&); - - detail::FrameQueue* frameQueue_; - detail::VideoParser* videoParser_; - }; - - class Impl; - -private: - cv::Ptr impl_; + Uncompressed_YUV420 = (('I'<<24)|('Y'<<16)|('U'<<8)|('V')), // Y,U,V (4:2:0) + Uncompressed_YV12 = (('Y'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,V,U (4:2:0) + Uncompressed_NV12 = (('N'<<24)|('V'<<16)|('1'<<8)|('2')), // Y,UV (4:2:0) + Uncompressed_YUYV = (('Y'<<24)|('U'<<16)|('Y'<<8)|('V')), // YUYV/YUY2 (4:2:2) + Uncompressed_UYVY = (('U'<<24)|('Y'<<16)|('V'<<8)|('Y')) // UYVY (4:2:2) }; -}} // namespace cv { namespace gpu { +enum ChromaFormat +{ + Monochrome = 0, + YUV420, + YUV422, + YUV444 +}; -namespace cv { +struct FormatInfo +{ + Codec codec; + ChromaFormat chromaFormat; + int width; + int height; +}; -template <> CV_EXPORTS void Ptr::delete_obj(); +class CV_EXPORTS VideoReader +{ +public: + virtual ~VideoReader() {} -} + virtual bool nextFrame(OutputArray frame) = 0; + + virtual FormatInfo format() const = 0; +}; + +class CV_EXPORTS RawVideoSource +{ +public: + virtual ~RawVideoSource() {} + + virtual bool getNextPacket(unsigned char** data, int* size, bool* endOfFile) = 0; + + virtual FormatInfo format() const = 0; +}; + +CV_EXPORTS Ptr createVideoReader(const String& filename); +CV_EXPORTS Ptr createVideoReader(const Ptr& source); + +}} // namespace cv { namespace gpucodec { #endif /* __OPENCV_GPUCODEC_HPP__ */ diff --git a/modules/gpucodec/perf/perf_video.cpp b/modules/gpucodec/perf/perf_video.cpp index ec9237d37..f389605d0 100644 --- a/modules/gpucodec/perf/perf_video.cpp +++ b/modules/gpucodec/perf/perf_video.cpp @@ -74,12 +74,11 @@ PERF_TEST_P(FileName, VideoReader, Values("gpu/video/768x576.avi", "gpu/video/19 if (PERF_RUN_GPU()) { - cv::gpu::VideoReader_GPU d_reader(inputFile); - ASSERT_TRUE( d_reader.isOpened() ); + cv::Ptr d_reader = cv::gpucodec::createVideoReader(inputFile); cv::gpu::GpuMat frame; - TEST_CYCLE_N(10) d_reader.read(frame); + TEST_CYCLE_N(10) d_reader->nextFrame(frame); GPU_SANITY_CHECK(frame); } diff --git a/modules/gpucodec/src/cuda/nv12_to_rgb.cu b/modules/gpucodec/src/cuda/nv12_to_rgb.cu index 536ba2715..1de916e5a 100644 --- a/modules/gpucodec/src/cuda/nv12_to_rgb.cu +++ b/modules/gpucodec/src/cuda/nv12_to_rgb.cu @@ -51,12 +51,7 @@ namespace cv { namespace gpu { namespace cudev { - __constant__ float constHueColorSpaceMat[9]; - - void loadHueCSC(float hueCSC[9]) - { - cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) ); - } + __constant__ float constHueColorSpaceMat[9] = {1.1644f, 0.0f, 1.596f, 1.1644f, -0.3918f, -0.813f, 1.1644f, 2.0172f, 0.0f}; __device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue) { diff --git a/modules/gpucodec/src/cuvid_video_source.cpp b/modules/gpucodec/src/cuvid_video_source.cpp index 73d6d2426..477951e93 100644 --- a/modules/gpucodec/src/cuvid_video_source.cpp +++ b/modules/gpucodec/src/cuvid_video_source.cpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -44,7 +45,11 @@ #ifdef HAVE_NVCUVID -cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const String& fname) +using namespace cv; +using namespace cv::gpucodec; +using namespace cv::gpucodec::detail; + +cv::gpucodec::detail::CuvidVideoSource::CuvidVideoSource(const String& fname) { CUVIDSOURCEPARAMS params; std::memset(¶ms, 0, sizeof(CUVIDSOURCEPARAMS)); @@ -55,51 +60,51 @@ cv::gpu::detail::CuvidVideoSource::CuvidVideoSource(const String& fname) params.pfnAudioDataHandler = 0; // now create the actual source - CUresult res = cuvidCreateVideoSource(&videoSource_, fname.c_str(), ¶ms); - if (res == CUDA_ERROR_INVALID_SOURCE) - throw std::runtime_error("Unsupported video source"); - cuSafeCall( res ); + CUresult cuRes = cuvidCreateVideoSource(&videoSource_, fname.c_str(), ¶ms); + if (cuRes == CUDA_ERROR_INVALID_SOURCE) + throw std::runtime_error(""); + cuSafeCall( cuRes ); CUVIDEOFORMAT vidfmt; cuSafeCall( cuvidGetSourceVideoFormat(videoSource_, &vidfmt, 0) ); - format_.codec = static_cast(vidfmt.codec); - format_.chromaFormat = static_cast(vidfmt.chroma_format); + format_.codec = static_cast(vidfmt.codec); + format_.chromaFormat = static_cast(vidfmt.chroma_format); format_.width = vidfmt.coded_width; format_.height = vidfmt.coded_height; } -cv::gpu::detail::CuvidVideoSource::~CuvidVideoSource() +cv::gpucodec::detail::CuvidVideoSource::~CuvidVideoSource() { cuvidDestroyVideoSource(videoSource_); } -cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::CuvidVideoSource::format() const +FormatInfo cv::gpucodec::detail::CuvidVideoSource::format() const { return format_; } -void cv::gpu::detail::CuvidVideoSource::start() +void cv::gpucodec::detail::CuvidVideoSource::start() { cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Started) ); } -void cv::gpu::detail::CuvidVideoSource::stop() +void cv::gpucodec::detail::CuvidVideoSource::stop() { cuSafeCall( cuvidSetVideoSourceState(videoSource_, cudaVideoState_Stopped) ); } -bool cv::gpu::detail::CuvidVideoSource::isStarted() const +bool cv::gpucodec::detail::CuvidVideoSource::isStarted() const { return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Started); } -bool cv::gpu::detail::CuvidVideoSource::hasError() const +bool cv::gpucodec::detail::CuvidVideoSource::hasError() const { return (cuvidGetVideoSourceState(videoSource_) == cudaVideoState_Error); } -int CUDAAPI cv::gpu::detail::CuvidVideoSource::HandleVideoData(void* userData, CUVIDSOURCEDATAPACKET* packet) +int CUDAAPI cv::gpucodec::detail::CuvidVideoSource::HandleVideoData(void* userData, CUVIDSOURCEDATAPACKET* packet) { CuvidVideoSource* thiz = static_cast(userData); diff --git a/modules/gpucodec/src/cuvid_video_source.h b/modules/gpucodec/src/cuvid_video_source.hpp similarity index 88% rename from modules/gpucodec/src/cuvid_video_source.h rename to modules/gpucodec/src/cuvid_video_source.hpp index a4a0e8521..c2f0e2f57 100644 --- a/modules/gpucodec/src/cuvid_video_source.h +++ b/modules/gpucodec/src/cuvid_video_source.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -40,25 +41,25 @@ // //M*/ -#ifndef __CUVUD_VIDEO_SOURCE_H__ -#define __CUVUD_VIDEO_SOURCE_H__ - -#include "opencv2/core/private.gpu.hpp" -#include "opencv2/gpucodec.hpp" -#include "thread.h" +#ifndef __CUVID_VIDEO_SOURCE_HPP__ +#define __CUVID_VIDEO_SOURCE_HPP__ #include -namespace cv { namespace gpu { namespace detail +#include "opencv2/core/private.gpu.hpp" +#include "opencv2/gpucodec.hpp" +#include "video_source.hpp" + +namespace cv { namespace gpucodec { namespace detail { -class CuvidVideoSource : public VideoReader_GPU::VideoSource +class CuvidVideoSource : public VideoSource { public: explicit CuvidVideoSource(const String& fname); ~CuvidVideoSource(); - VideoReader_GPU::FormatInfo format() const; + FormatInfo format() const; void start(); void stop(); bool isStarted() const; @@ -78,9 +79,9 @@ private: static int CUDAAPI HandleVideoData(void* pUserData, CUVIDSOURCEDATAPACKET* pPacket); CUvideosource videoSource_; - VideoReader_GPU::FormatInfo format_; + FormatInfo format_; }; }}} -#endif // __CUVUD_VIDEO_SOURCE_H__ +#endif // __CUVID_VIDEO_SOURCE_HPP__ diff --git a/modules/gpucodec/src/ffmpeg_video_source.cpp b/modules/gpucodec/src/ffmpeg_video_source.cpp index 6ba09284d..b5a73875b 100644 --- a/modules/gpucodec/src/ffmpeg_video_source.cpp +++ b/modules/gpucodec/src/ffmpeg_video_source.cpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -48,6 +49,10 @@ #include "../src/cap_ffmpeg_impl.hpp" #endif +using namespace cv; +using namespace cv::gpucodec; +using namespace cv::gpucodec::detail; + namespace { Create_InputMediaStream_FFMPEG_Plugin create_InputMediaStream_FFMPEG_p = 0; @@ -94,7 +99,7 @@ namespace } } -cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname) : +cv::gpucodec::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname) : stream_(0) { CV_Assert( init_MediaStream_FFMPEG() ); @@ -106,75 +111,33 @@ cv::gpu::detail::FFmpegVideoSource::FFmpegVideoSource(const String& fname) : stream_ = create_InputMediaStream_FFMPEG_p(fname.c_str(), &codec, &chroma_format, &width, &height); if (!stream_) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported video source"); + CV_Error(Error::StsUnsupportedFormat, "Unsupported video source"); - format_.codec = static_cast(codec); - format_.chromaFormat = static_cast(chroma_format); + format_.codec = static_cast(codec); + format_.chromaFormat = static_cast(chroma_format); format_.width = width; format_.height = height; } -cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::detail::FFmpegVideoSource::format() const +cv::gpucodec::detail::FFmpegVideoSource::~FFmpegVideoSource() +{ + if (stream_) + release_InputMediaStream_FFMPEG_p(stream_); +} + +FormatInfo cv::gpucodec::detail::FFmpegVideoSource::format() const { return format_; } -void cv::gpu::detail::FFmpegVideoSource::start() +bool cv::gpucodec::detail::FFmpegVideoSource::getNextPacket(unsigned char** data, int* size, bool* bEndOfFile) { - stop_ = false; - hasError_ = false; - thread_ = new Thread(readLoop, this); -} + int endOfFile; -void cv::gpu::detail::FFmpegVideoSource::stop() -{ - stop_ = true; - thread_->wait(); - thread_.release(); -} + int res = read_InputMediaStream_FFMPEG_p(stream_, data, size, &endOfFile); -bool cv::gpu::detail::FFmpegVideoSource::isStarted() const -{ - return !stop_; -} - -bool cv::gpu::detail::FFmpegVideoSource::hasError() const -{ - return hasError_; -} - -void cv::gpu::detail::FFmpegVideoSource::readLoop(void* userData) -{ - FFmpegVideoSource* thiz = static_cast(userData); - - for (;;) - { - unsigned char* data; - int size; - int endOfFile; - - if (!read_InputMediaStream_FFMPEG_p(thiz->stream_, &data, &size, &endOfFile)) - { - thiz->hasError_ = !endOfFile; - break; - } - - if (!thiz->parseVideoData(data, size)) - { - thiz->hasError_ = true; - break; - } - - if (thiz->stop_) - break; - } - - thiz->parseVideoData(0, 0, true); -} - -template <> void cv::Ptr::delete_obj() -{ - if (obj) release_InputMediaStream_FFMPEG_p(obj); + *bEndOfFile = (endOfFile != 0); + return res != 0; } #endif // HAVE_CUDA diff --git a/modules/gpucodec/src/ffmpeg_video_source.h b/modules/gpucodec/src/ffmpeg_video_source.hpp similarity index 75% rename from modules/gpucodec/src/ffmpeg_video_source.h rename to modules/gpucodec/src/ffmpeg_video_source.hpp index d097785d7..6ea59ddac 100644 --- a/modules/gpucodec/src/ffmpeg_video_source.h +++ b/modules/gpucodec/src/ffmpeg_video_source.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -40,43 +41,31 @@ // //M*/ -#ifndef __FFMPEG_VIDEO_SOURCE_H__ -#define __FFMPEG_VIDEO_SOURCE_H__ +#ifndef __FFMPEG_VIDEO_SOURCE_HPP__ +#define __FFMPEG_VIDEO_SOURCE_HPP__ #include "opencv2/gpucodec.hpp" -#include "thread.h" struct InputMediaStream_FFMPEG; -namespace cv { namespace gpu { namespace detail { +namespace cv { namespace gpucodec { namespace detail { -class FFmpegVideoSource : public VideoReader_GPU::VideoSource +class FFmpegVideoSource : public RawVideoSource { public: FFmpegVideoSource(const String& fname); + ~FFmpegVideoSource(); - VideoReader_GPU::FormatInfo format() const; - void start(); - void stop(); - bool isStarted() const; - bool hasError() const; + bool getNextPacket(unsigned char** data, int* size, bool* endOfFile); + + FormatInfo format() const; private: - VideoReader_GPU::FormatInfo format_; + FormatInfo format_; - cv::Ptr stream_; - - cv::Ptr thread_; - volatile bool stop_; - volatile bool hasError_; - - static void readLoop(void* userData); + InputMediaStream_FFMPEG* stream_; }; }}} -namespace cv { - template <> void Ptr::delete_obj(); -} - -#endif // __FFMPEG_VIDEO_SOURCE_H__ +#endif // __FFMPEG_VIDEO_SOURCE_HPP__ diff --git a/modules/gpucodec/src/frame_queue.cpp b/modules/gpucodec/src/frame_queue.cpp index 2c5045500..f9141d84f 100644 --- a/modules/gpucodec/src/frame_queue.cpp +++ b/modules/gpucodec/src/frame_queue.cpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -44,16 +45,16 @@ #ifdef HAVE_NVCUVID -cv::gpu::detail::FrameQueue::FrameQueue() : +cv::gpucodec::detail::FrameQueue::FrameQueue() : endOfDecode_(0), framesInQueue_(0), readPosition_(0) { std::memset(displayQueue_, 0, sizeof(displayQueue_)); - std::memset((void*)isFrameInUse_, 0, sizeof(isFrameInUse_)); + std::memset((void*) isFrameInUse_, 0, sizeof(isFrameInUse_)); } -bool cv::gpu::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex) +bool cv::gpucodec::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex) { while (isInUse(pictureIndex)) { @@ -67,7 +68,7 @@ bool cv::gpu::detail::FrameQueue::waitUntilFrameAvailable(int pictureIndex) return true; } -void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams) +void cv::gpucodec::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams) { // Mark the frame as 'in-use' so we don't re-use it for decoding until it is no longer needed // for display @@ -98,7 +99,7 @@ void cv::gpu::detail::FrameQueue::enqueue(const CUVIDPARSERDISPINFO* picParams) } while (!isEndOfDecode()); } -bool cv::gpu::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo) +bool cv::gpucodec::detail::FrameQueue::dequeue(CUVIDPARSERDISPINFO& displayInfo) { AutoLock autoLock(mtx_); diff --git a/modules/gpucodec/src/frame_queue.h b/modules/gpucodec/src/frame_queue.hpp similarity index 93% rename from modules/gpucodec/src/frame_queue.h rename to modules/gpucodec/src/frame_queue.hpp index d9a4433b3..c3b427b74 100644 --- a/modules/gpucodec/src/frame_queue.h +++ b/modules/gpucodec/src/frame_queue.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -40,15 +41,15 @@ // //M*/ -#ifndef __FRAME_QUEUE_H__ -#define __FRAME_QUEUE_H__ +#ifndef __FRAME_QUEUE_HPP__ +#define __FRAME_QUEUE_HPP__ #include "opencv2/core/utility.hpp" #include "opencv2/core/private.gpu.hpp" #include -namespace cv { namespace gpu { namespace detail +namespace cv { namespace gpucodec { namespace detail { class FrameQueue @@ -94,4 +95,4 @@ private: }}} -#endif // __FRAME_QUEUE_H__ +#endif // __FRAME_QUEUE_HPP__ diff --git a/modules/gpucodec/src/precomp.hpp b/modules/gpucodec/src/precomp.hpp index 2afb0abb1..7cef1b7a9 100644 --- a/modules/gpucodec/src/precomp.hpp +++ b/modules/gpucodec/src/precomp.hpp @@ -67,12 +67,13 @@ #include #endif - #include "thread.h" - #include "ffmpeg_video_source.h" - #include "cuvid_video_source.h" - #include "frame_queue.h" - #include "video_decoder.h" - #include "video_parser.h" + #include "thread.hpp" + #include "video_source.hpp" + #include "ffmpeg_video_source.hpp" + #include "cuvid_video_source.hpp" + #include "frame_queue.hpp" + #include "video_decoder.hpp" + #include "video_parser.hpp" #include "../src/cap_ffmpeg_api.hpp" #endif diff --git a/modules/gpucodec/src/thread.cpp b/modules/gpucodec/src/thread.cpp index db9f3de39..b936d8e21 100644 --- a/modules/gpucodec/src/thread.cpp +++ b/modules/gpucodec/src/thread.cpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -44,7 +45,7 @@ #ifdef HAVE_NVCUVID -using namespace cv::gpu::detail; +using namespace cv::gpucodec::detail; #ifdef WIN32 @@ -66,7 +67,7 @@ namespace } } -class cv::gpu::detail::Thread::Impl +class cv::gpucodec::detail::Thread::Impl { public: Impl(Thread::Func func, void* userData) @@ -119,7 +120,7 @@ namespace } } -class cv::gpu::detail::Thread::Impl +class cv::gpucodec::detail::Thread::Impl { public: Impl(Thread::Func func, void* userData) @@ -147,17 +148,17 @@ private: #endif -cv::gpu::detail::Thread::Thread(Func func, void* userData) : +cv::gpucodec::detail::Thread::Thread(Func func, void* userData) : impl_(new Impl(func, userData)) { } -void cv::gpu::detail::Thread::wait() +void cv::gpucodec::detail::Thread::wait() { impl_->wait(); } -void cv::gpu::detail::Thread::sleep(int ms) +void cv::gpucodec::detail::Thread::sleep(int ms) { #ifdef WIN32 ::Sleep(ms); @@ -166,7 +167,7 @@ void cv::gpu::detail::Thread::sleep(int ms) #endif } -template <> void cv::Ptr::delete_obj() +template <> void cv::Ptr::delete_obj() { if (obj) delete obj; } diff --git a/modules/gpucodec/src/thread.h b/modules/gpucodec/src/thread.hpp similarity index 87% rename from modules/gpucodec/src/thread.h rename to modules/gpucodec/src/thread.hpp index 1489f5830..ccda5b5c7 100644 --- a/modules/gpucodec/src/thread.h +++ b/modules/gpucodec/src/thread.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -40,12 +41,12 @@ // //M*/ -#ifndef __THREAD_WRAPPERS_H__ -#define __THREAD_WRAPPERS_H__ +#ifndef __THREAD_WRAPPERS_HPP__ +#define __THREAD_WRAPPERS_HPP__ #include "opencv2/core.hpp" -namespace cv { namespace gpu { namespace detail { +namespace cv { namespace gpucodec { namespace detail { class Thread { @@ -67,7 +68,7 @@ private: }}} namespace cv { - template <> void Ptr::delete_obj(); + template <> void Ptr::delete_obj(); } -#endif // __THREAD_WRAPPERS_H__ +#endif // __THREAD_WRAPPERS_HPP__ diff --git a/modules/gpucodec/src/video_decoder.cpp b/modules/gpucodec/src/video_decoder.cpp index 7e28e872b..d734ef363 100644 --- a/modules/gpucodec/src/video_decoder.cpp +++ b/modules/gpucodec/src/video_decoder.cpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -44,7 +45,7 @@ #ifdef HAVE_NVCUVID -void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& videoFormat) +void cv::gpucodec::detail::VideoDecoder::create(const FormatInfo& videoFormat) { release(); @@ -103,7 +104,7 @@ void cv::gpu::detail::VideoDecoder::create(const VideoReader_GPU::FormatInfo& vi cuSafeCall( cuvidCreateDecoder(&decoder_, &createInfo_) ); } -void cv::gpu::detail::VideoDecoder::release() +void cv::gpucodec::detail::VideoDecoder::release() { if (decoder_) { diff --git a/modules/gpucodec/src/video_decoder.h b/modules/gpucodec/src/video_decoder.hpp similarity index 85% rename from modules/gpucodec/src/video_decoder.h rename to modules/gpucodec/src/video_decoder.hpp index 7a36335cc..05a92f266 100644 --- a/modules/gpucodec/src/video_decoder.h +++ b/modules/gpucodec/src/video_decoder.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -40,21 +41,21 @@ // //M*/ -#ifndef __VIDEO_DECODER_H__ -#define __VIDEO_DECODER_H__ +#ifndef __VIDEO_DECODER_HPP__ +#define __VIDEO_DECODER_HPP__ + +#include #include "opencv2/core/private.gpu.hpp" #include "opencv2/gpucodec.hpp" -#include - -namespace cv { namespace gpu { namespace detail +namespace cv { namespace gpucodec { namespace detail { class VideoDecoder { public: - VideoDecoder(const VideoReader_GPU::FormatInfo& videoFormat, CUvideoctxlock lock) : lock_(lock), decoder_(0) + VideoDecoder(const FormatInfo& videoFormat, CUvideoctxlock lock) : lock_(lock), decoder_(0) { create(videoFormat); } @@ -64,7 +65,7 @@ public: release(); } - void create(const VideoReader_GPU::FormatInfo& videoFormat); + void create(const FormatInfo& videoFormat); void release(); // Get the code-type currently used. @@ -84,17 +85,17 @@ public: return cuvidDecodePicture(decoder_, picParams) == CUDA_SUCCESS; } - cv::gpu::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams) + gpu::GpuMat mapFrame(int picIdx, CUVIDPROCPARAMS& videoProcParams) { CUdeviceptr ptr; unsigned int pitch; cuSafeCall( cuvidMapVideoFrame(decoder_, picIdx, &ptr, &pitch, &videoProcParams) ); - return GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch); + return gpu::GpuMat(targetHeight() * 3 / 2, targetWidth(), CV_8UC1, (void*) ptr, pitch); } - void unmapFrame(cv::gpu::GpuMat& frame) + void unmapFrame(gpu::GpuMat& frame) { cuSafeCall( cuvidUnmapVideoFrame(decoder_, (CUdeviceptr) frame.data) ); frame.release(); @@ -108,4 +109,4 @@ private: }}} -#endif // __VIDEO_DECODER_H__ +#endif // __VIDEO_DECODER_HPP__ diff --git a/modules/gpucodec/src/video_parser.cpp b/modules/gpucodec/src/video_parser.cpp index 620f85fe8..66aab62ad 100644 --- a/modules/gpucodec/src/video_parser.cpp +++ b/modules/gpucodec/src/video_parser.cpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -44,11 +45,11 @@ #ifdef HAVE_NVCUVID -cv::gpu::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue) : +cv::gpucodec::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, FrameQueue* frameQueue) : videoDecoder_(videoDecoder), frameQueue_(frameQueue), unparsedPackets_(0), hasError_(false) { CUVIDPARSERPARAMS params; - memset(¶ms, 0, sizeof(CUVIDPARSERPARAMS)); + std::memset(¶ms, 0, sizeof(CUVIDPARSERPARAMS)); params.CodecType = videoDecoder->codec(); params.ulMaxNumDecodeSurfaces = videoDecoder->maxDecodeSurfaces(); @@ -61,7 +62,7 @@ cv::gpu::detail::VideoParser::VideoParser(VideoDecoder* videoDecoder, FrameQueue cuSafeCall( cuvidCreateVideoParser(&parser_, ¶ms) ); } -bool cv::gpu::detail::VideoParser::parseVideoData(const unsigned char* data, size_t size, bool endOfStream) +bool cv::gpucodec::detail::VideoParser::parseVideoData(const unsigned char* data, size_t size, bool endOfStream) { CUVIDSOURCEDATAPACKET packet; std::memset(&packet, 0, sizeof(CUVIDSOURCEDATAPACKET)); @@ -95,7 +96,7 @@ bool cv::gpu::detail::VideoParser::parseVideoData(const unsigned char* data, siz return !frameQueue_->isEndOfDecode(); } -int CUDAAPI cv::gpu::detail::VideoParser::HandleVideoSequence(void* userData, CUVIDEOFORMAT* format) +int CUDAAPI cv::gpucodec::detail::VideoParser::HandleVideoSequence(void* userData, CUVIDEOFORMAT* format) { VideoParser* thiz = static_cast(userData); @@ -106,10 +107,10 @@ int CUDAAPI cv::gpu::detail::VideoParser::HandleVideoSequence(void* userData, CU format->coded_height != thiz->videoDecoder_->frameHeight() || format->chroma_format != thiz->videoDecoder_->chromaFormat()) { - VideoReader_GPU::FormatInfo newFormat; + FormatInfo newFormat; - newFormat.codec = static_cast(format->codec); - newFormat.chromaFormat = static_cast(format->chroma_format); + newFormat.codec = static_cast(format->codec); + newFormat.chromaFormat = static_cast(format->chroma_format); newFormat.width = format->coded_width; newFormat.height = format->coded_height; @@ -127,7 +128,7 @@ int CUDAAPI cv::gpu::detail::VideoParser::HandleVideoSequence(void* userData, CU return true; } -int CUDAAPI cv::gpu::detail::VideoParser::HandlePictureDecode(void* userData, CUVIDPICPARAMS* picParams) +int CUDAAPI cv::gpucodec::detail::VideoParser::HandlePictureDecode(void* userData, CUVIDPICPARAMS* picParams) { VideoParser* thiz = static_cast(userData); @@ -147,7 +148,7 @@ int CUDAAPI cv::gpu::detail::VideoParser::HandlePictureDecode(void* userData, CU return true; } -int CUDAAPI cv::gpu::detail::VideoParser::HandlePictureDisplay(void* userData, CUVIDPARSERDISPINFO* picParams) +int CUDAAPI cv::gpucodec::detail::VideoParser::HandlePictureDisplay(void* userData, CUVIDPARSERDISPINFO* picParams) { VideoParser* thiz = static_cast(userData); diff --git a/modules/gpucodec/src/video_parser.h b/modules/gpucodec/src/video_parser.hpp similarity index 92% rename from modules/gpucodec/src/video_parser.h rename to modules/gpucodec/src/video_parser.hpp index e11b7eff6..b4dddb389 100644 --- a/modules/gpucodec/src/video_parser.h +++ b/modules/gpucodec/src/video_parser.hpp @@ -7,11 +7,12 @@ // copy or use the software. // // -// License Agreement +// License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, @@ -40,17 +41,17 @@ // //M*/ -#ifndef __VIDEO_PARSER_H__ -#define __VIDEO_PARSER_H__ - -#include "opencv2/core/private.gpu.hpp" -#include "opencv2/gpucodec.hpp" -#include "frame_queue.h" -#include "video_decoder.h" +#ifndef __VIDEO_PARSER_HPP__ +#define __VIDEO_PARSER_HPP__ #include -namespace cv { namespace gpu { namespace detail +#include "opencv2/core/private.gpu.hpp" +#include "opencv2/gpucodec.hpp" +#include "frame_queue.hpp" +#include "video_decoder.hpp" + +namespace cv { namespace gpucodec { namespace detail { class VideoParser @@ -91,4 +92,4 @@ private: }}} -#endif // __VIDEO_PARSER_H__ +#endif // __VIDEO_PARSER_HPP__ diff --git a/modules/gpucodec/src/video_reader.cpp b/modules/gpucodec/src/video_reader.cpp index dbb4bbcf2..67e9cd107 100644 --- a/modules/gpucodec/src/video_reader.cpp +++ b/modules/gpucodec/src/video_reader.cpp @@ -42,88 +42,77 @@ #include "precomp.hpp" +using namespace cv; +using namespace cv::gpu; +using namespace cv::gpucodec; + #ifndef HAVE_NVCUVID -class cv::gpu::VideoReader_GPU::Impl -{ -}; - -cv::gpu::VideoReader_GPU::VideoReader_GPU() { throw_no_cuda(); } -cv::gpu::VideoReader_GPU::VideoReader_GPU(const String&) { throw_no_cuda(); } -cv::gpu::VideoReader_GPU::VideoReader_GPU(const cv::Ptr&) { throw_no_cuda(); } -cv::gpu::VideoReader_GPU::~VideoReader_GPU() { } -void cv::gpu::VideoReader_GPU::open(const String&) { throw_no_cuda(); } -void cv::gpu::VideoReader_GPU::open(const cv::Ptr&) { throw_no_cuda(); } -bool cv::gpu::VideoReader_GPU::isOpened() const { return false; } -void cv::gpu::VideoReader_GPU::close() { } -bool cv::gpu::VideoReader_GPU::read(GpuMat&) { throw_no_cuda(); return false; } -cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::VideoReader_GPU::format() const { throw_no_cuda(); FormatInfo format_ = {MPEG1,Monochrome,0,0}; return format_; } -bool cv::gpu::VideoReader_GPU::VideoSource::parseVideoData(const unsigned char*, size_t, bool) { throw_no_cuda(); return false; } -void cv::gpu::VideoReader_GPU::dumpFormat(std::ostream&) { throw_no_cuda(); } +Ptr cv::gpucodec::createVideoReader(const String&) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpucodec::createVideoReader(const Ptr&) { throw_no_cuda(); return Ptr(); } #else // HAVE_NVCUVID -class cv::gpu::VideoReader_GPU::Impl -{ -public: - explicit Impl(const cv::Ptr& source); - ~Impl(); - - bool grab(cv::gpu::GpuMat& frame); - - cv::gpu::VideoReader_GPU::FormatInfo format() const { return videoSource_->format(); } - -private: - cv::Ptr videoSource_; - - cv::Ptr frameQueue_; - cv::Ptr videoDecoder_; - cv::Ptr videoParser_; - - CUvideoctxlock lock_; - - std::deque< std::pair > frames_; -}; - -cv::gpu::VideoReader_GPU::Impl::Impl(const cv::Ptr& source) : - videoSource_(source), - lock_(0) -{ - // init context - GpuMat temp(1, 1, CV_8UC1); - temp.release(); - - DeviceInfo devInfo; - CV_Assert( devInfo.supports(FEATURE_SET_COMPUTE_11) ); - - CUcontext ctx; - cuSafeCall( cuCtxGetCurrent(&ctx) ); - cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) ); - - frameQueue_ = new detail::FrameQueue; - videoDecoder_ = new detail::VideoDecoder(videoSource_->format(), lock_); - videoParser_ = new detail::VideoParser(videoDecoder_, frameQueue_); - - videoSource_->setFrameQueue(frameQueue_); - videoSource_->setVideoParser(videoParser_); - - videoSource_->start(); -} - -cv::gpu::VideoReader_GPU::Impl::~Impl() -{ - frameQueue_->endDecode(); - videoSource_->stop(); -} - namespace cv { namespace gpu { namespace cudev { - void loadHueCSC(float hueCSC[9]); void NV12_to_RGB(const PtrStepb decodedFrame, PtrStepSz interopFrame, cudaStream_t stream = 0); }}} namespace { + class VideoReaderImpl : public VideoReader + { + public: + explicit VideoReaderImpl(const Ptr& source); + ~VideoReaderImpl(); + + bool nextFrame(OutputArray frame); + + FormatInfo format() const; + + private: + Ptr videoSource_; + + Ptr frameQueue_; + Ptr videoDecoder_; + Ptr videoParser_; + + CUvideoctxlock lock_; + + std::deque< std::pair > frames_; + }; + + FormatInfo VideoReaderImpl::format() const + { + return videoSource_->format(); + } + + VideoReaderImpl::VideoReaderImpl(const Ptr& source) : + videoSource_(source), + lock_(0) + { + // init context + GpuMat temp(1, 1, CV_8UC1); + temp.release(); + + CUcontext ctx; + cuSafeCall( cuCtxGetCurrent(&ctx) ); + cuSafeCall( cuvidCtxLockCreate(&lock_, ctx) ); + + frameQueue_ = new detail::FrameQueue; + videoDecoder_ = new detail::VideoDecoder(videoSource_->format(), lock_); + videoParser_ = new detail::VideoParser(videoDecoder_, frameQueue_); + + videoSource_->setVideoParser(videoParser_); + videoSource_->start(); + } + + VideoReaderImpl::~VideoReaderImpl() + { + frameQueue_->endDecode(); + videoSource_->stop(); + } + class VideoCtxAutoLock { public: @@ -134,259 +123,114 @@ namespace CUvideoctxlock m_lock; }; - enum ColorSpace - { - ITU601 = 1, - ITU709 = 2 - }; - - void setColorSpaceMatrix(ColorSpace CSC, float hueCSC[9], float hue) - { - float hueSin = std::sin(hue); - float hueCos = std::cos(hue); - - if (CSC == ITU601) - { - //CCIR 601 - hueCSC[0] = 1.1644f; - hueCSC[1] = hueSin * 1.5960f; - hueCSC[2] = hueCos * 1.5960f; - hueCSC[3] = 1.1644f; - hueCSC[4] = (hueCos * -0.3918f) - (hueSin * 0.8130f); - hueCSC[5] = (hueSin * 0.3918f) - (hueCos * 0.8130f); - hueCSC[6] = 1.1644f; - hueCSC[7] = hueCos * 2.0172f; - hueCSC[8] = hueSin * -2.0172f; - } - else if (CSC == ITU709) - { - //CCIR 709 - hueCSC[0] = 1.0f; - hueCSC[1] = hueSin * 1.57480f; - hueCSC[2] = hueCos * 1.57480f; - hueCSC[3] = 1.0; - hueCSC[4] = (hueCos * -0.18732f) - (hueSin * 0.46812f); - hueCSC[5] = (hueSin * 0.18732f) - (hueCos * 0.46812f); - hueCSC[6] = 1.0f; - hueCSC[7] = hueCos * 1.85560f; - hueCSC[8] = hueSin * -1.85560f; - } - } - - void cudaPostProcessFrame(const cv::gpu::GpuMat& decodedFrame, cv::gpu::GpuMat& interopFrame, int width, int height) + void cudaPostProcessFrame(const GpuMat& decodedFrame, OutputArray _outFrame, int width, int height) { using namespace cv::gpu::cudev; - static bool updateCSC = true; - static float hueColorSpaceMat[9]; - - // Upload the Color Space Conversion Matrices - if (updateCSC) - { - const ColorSpace colorSpace = ITU601; - const float hue = 0.0f; - - // CCIR 601/709 - setColorSpaceMatrix(colorSpace, hueColorSpaceMat, hue); - - updateCSC = false; - } - // Final Stage: NV12toARGB color space conversion - interopFrame.create(height, width, CV_8UC4); + _outFrame.create(height, width, CV_8UC4); + GpuMat outFrame = _outFrame.getGpuMat(); - loadHueCSC(hueColorSpaceMat); - - NV12_to_RGB(decodedFrame, interopFrame); + NV12_to_RGB(decodedFrame, outFrame); } -} -bool cv::gpu::VideoReader_GPU::Impl::grab(GpuMat& frame) -{ - if (videoSource_->hasError() || videoParser_->hasError()) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported video source"); - - if (!videoSource_->isStarted() || frameQueue_->isEndOfDecode()) - return false; - - if (frames_.empty()) + bool VideoReaderImpl::nextFrame(OutputArray frame) { - CUVIDPARSERDISPINFO displayInfo; + if (videoSource_->hasError() || videoParser_->hasError()) + CV_Error(Error::StsUnsupportedFormat, "Unsupported video source"); - for (;;) + if (!videoSource_->isStarted() || frameQueue_->isEndOfDecode()) + return false; + + if (frames_.empty()) { - if (frameQueue_->dequeue(displayInfo)) - break; + CUVIDPARSERDISPINFO displayInfo; - if (videoSource_->hasError() || videoParser_->hasError()) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported video source"); + for (;;) + { + if (frameQueue_->dequeue(displayInfo)) + break; - if (frameQueue_->isEndOfDecode()) - return false; + if (videoSource_->hasError() || videoParser_->hasError()) + CV_Error(Error::StsUnsupportedFormat, "Unsupported video source"); - // Wait a bit - detail::Thread::sleep(1); + if (frameQueue_->isEndOfDecode()) + return false; + + // Wait a bit + detail::Thread::sleep(1); + } + + bool isProgressive = displayInfo.progressive_frame != 0; + const int num_fields = isProgressive ? 1 : 2 + displayInfo.repeat_first_field; + + for (int active_field = 0; active_field < num_fields; ++active_field) + { + CUVIDPROCPARAMS videoProcParams; + std::memset(&videoProcParams, 0, sizeof(CUVIDPROCPARAMS)); + + videoProcParams.progressive_frame = displayInfo.progressive_frame; + videoProcParams.second_field = active_field; + videoProcParams.top_field_first = displayInfo.top_field_first; + videoProcParams.unpaired_field = (num_fields == 1); + + frames_.push_back(std::make_pair(displayInfo, videoProcParams)); + } } - bool isProgressive = displayInfo.progressive_frame != 0; - const int num_fields = isProgressive ? 1 : 2 + displayInfo.repeat_first_field; + if (frames_.empty()) + return false; + + std::pair frameInfo = frames_.front(); + frames_.pop_front(); - for (int active_field = 0; active_field < num_fields; ++active_field) { - CUVIDPROCPARAMS videoProcParams; - std::memset(&videoProcParams, 0, sizeof(CUVIDPROCPARAMS)); + VideoCtxAutoLock autoLock(lock_); - videoProcParams.progressive_frame = displayInfo.progressive_frame; - videoProcParams.second_field = active_field; - videoProcParams.top_field_first = displayInfo.top_field_first; - videoProcParams.unpaired_field = (num_fields == 1); + // map decoded video frame to CUDA surface + GpuMat decodedFrame = videoDecoder_->mapFrame(frameInfo.first.picture_index, frameInfo.second); - frames_.push_back(std::make_pair(displayInfo, videoProcParams)); + // perform post processing on the CUDA surface (performs colors space conversion and post processing) + // comment this out if we inclue the line of code seen above + cudaPostProcessFrame(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight()); + + // unmap video frame + // unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding) + videoDecoder_->unmapFrame(decodedFrame); } + + // release the frame, so it can be re-used in decoder + if (frames_.empty()) + frameQueue_->releaseFrame(frameInfo.first); + + return true; } - - if (frames_.empty()) - return false; - - std::pair frameInfo = frames_.front(); - frames_.pop_front(); - - { - VideoCtxAutoLock autoLock(lock_); - - // map decoded video frame to CUDA surface - cv::gpu::GpuMat decodedFrame = videoDecoder_->mapFrame(frameInfo.first.picture_index, frameInfo.second); - - // perform post processing on the CUDA surface (performs colors space conversion and post processing) - // comment this out if we inclue the line of code seen above - cudaPostProcessFrame(decodedFrame, frame, videoDecoder_->targetWidth(), videoDecoder_->targetHeight()); - - // unmap video frame - // unmapFrame() synchronizes with the VideoDecode API (ensures the frame has finished decoding) - videoDecoder_->unmapFrame(decodedFrame); - } - - // release the frame, so it can be re-used in decoder - if (frames_.empty()) - frameQueue_->releaseFrame(frameInfo.first); - - return true; } -//////////////////////////////////////////////////////////////////////////// - -cv::gpu::VideoReader_GPU::VideoReader_GPU() -{ -} - -cv::gpu::VideoReader_GPU::VideoReader_GPU(const String& filename) -{ - open(filename); -} - -cv::gpu::VideoReader_GPU::VideoReader_GPU(const cv::Ptr& source) -{ - open(source); -} - -cv::gpu::VideoReader_GPU::~VideoReader_GPU() -{ - close(); -} - -void cv::gpu::VideoReader_GPU::open(const String& filename) +Ptr cv::gpucodec::createVideoReader(const String& filename) { CV_Assert( !filename.empty() ); -#ifndef __APPLE__ + Ptr videoSource; + try { - cv::Ptr source(new detail::CuvidVideoSource(filename)); - open(source); + videoSource = new detail::CuvidVideoSource(filename); } - catch (const std::runtime_error&) -#endif + catch (...) { - cv::Ptr source(new cv::gpu::detail::FFmpegVideoSource(filename)); - open(source); - } -} - -void cv::gpu::VideoReader_GPU::open(const cv::Ptr& source) -{ - CV_Assert( !source.empty() ); - close(); - impl_ = new Impl(source); -} - -bool cv::gpu::VideoReader_GPU::isOpened() const -{ - return !impl_.empty(); -} - -void cv::gpu::VideoReader_GPU::close() -{ - impl_.release(); -} - -bool cv::gpu::VideoReader_GPU::read(GpuMat& image) -{ - if (!isOpened()) - return false; - - if (!impl_->grab(image)) - { - close(); - return false; + Ptr source(new detail::FFmpegVideoSource(filename)); + videoSource = new detail::RawVideoSourceWrapper(source); } - return true; + return new VideoReaderImpl(videoSource); } -cv::gpu::VideoReader_GPU::FormatInfo cv::gpu::VideoReader_GPU::format() const +Ptr cv::gpucodec::createVideoReader(const Ptr& source) { - CV_Assert( isOpened() ); - return impl_->format(); -} - -bool cv::gpu::VideoReader_GPU::VideoSource::parseVideoData(const unsigned char* data, size_t size, bool endOfStream) -{ - return videoParser_->parseVideoData(data, size, endOfStream); -} - -void cv::gpu::VideoReader_GPU::dumpFormat(std::ostream& st) -{ - static const char* codecs[] = - { - "MPEG1", - "MPEG2", - "MPEG4", - "VC1", - "H264", - "JPEG", - "H264_SVC", - "H264_MVC" - }; - - static const char* chromas[] = - { - "Monochrome", - "YUV420", - "YUV422", - "YUV444" - }; - - FormatInfo _format = this->format(); - - st << "Frame Size : " << _format.width << "x" << _format.height << std::endl; - st << "Codec : " << (_format.codec <= H264_MVC ? codecs[_format.codec] : "Uncompressed YUV") << std::endl; - st << "Chroma Format : " << chromas[_format.chromaFormat] << std::endl; + Ptr videoSource(new detail::RawVideoSourceWrapper(source)); + return new VideoReaderImpl(videoSource); } #endif // HAVE_NVCUVID - -template <> void cv::Ptr::delete_obj() -{ - if (obj) delete obj; -} diff --git a/modules/gpucodec/src/video_source.cpp b/modules/gpucodec/src/video_source.cpp new file mode 100644 index 000000000..ce6a1bd8c --- /dev/null +++ b/modules/gpucodec/src/video_source.cpp @@ -0,0 +1,121 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#include "precomp.hpp" + +#ifdef HAVE_NVCUVID + +using namespace cv; +using namespace cv::gpucodec; +using namespace cv::gpucodec::detail; + +bool cv::gpucodec::detail::VideoSource::parseVideoData(const unsigned char* data, size_t size, bool endOfStream) +{ + return videoParser_->parseVideoData(data, size, endOfStream); +} + +cv::gpucodec::detail::RawVideoSourceWrapper::RawVideoSourceWrapper(const Ptr& source) : + source_(source) +{ + CV_Assert( !source_.empty() ); +} + +cv::gpucodec::FormatInfo cv::gpucodec::detail::RawVideoSourceWrapper::format() const +{ + return source_->format(); +} + +void cv::gpucodec::detail::RawVideoSourceWrapper::start() +{ + stop_ = false; + hasError_ = false; + thread_ = new Thread(readLoop, this); +} + +void cv::gpucodec::detail::RawVideoSourceWrapper::stop() +{ + stop_ = true; + thread_->wait(); + thread_.release(); +} + +bool cv::gpucodec::detail::RawVideoSourceWrapper::isStarted() const +{ + return !stop_; +} + +bool cv::gpucodec::detail::RawVideoSourceWrapper::hasError() const +{ + return hasError_; +} + +void cv::gpucodec::detail::RawVideoSourceWrapper::readLoop(void* userData) +{ + RawVideoSourceWrapper* thiz = static_cast(userData); + + for (;;) + { + unsigned char* data; + int size; + bool endOfFile; + + if (!thiz->source_->getNextPacket(&data, &size, &endOfFile)) + { + thiz->hasError_ = !endOfFile; + break; + } + + if (!thiz->parseVideoData(data, size)) + { + thiz->hasError_ = true; + break; + } + + if (thiz->stop_) + break; + } + + thiz->parseVideoData(0, 0, true); +} + +#endif // HAVE_NVCUVID diff --git a/modules/gpucodec/src/video_source.hpp b/modules/gpucodec/src/video_source.hpp new file mode 100644 index 000000000..b4d930ee0 --- /dev/null +++ b/modules/gpucodec/src/video_source.hpp @@ -0,0 +1,99 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +#ifndef __GPUCODEC_VIDEO_SOURCE_H__ +#define __GPUCODEC_VIDEO_SOURCE_H__ + +#include "opencv2/core/private.gpu.hpp" +#include "opencv2/gpucodec.hpp" +#include "thread.hpp" + +namespace cv { namespace gpucodec { namespace detail +{ + +class VideoParser; + +class VideoSource +{ +public: + virtual ~VideoSource() {} + + virtual FormatInfo format() const = 0; + virtual void start() = 0; + virtual void stop() = 0; + virtual bool isStarted() const = 0; + virtual bool hasError() const = 0; + + void setVideoParser(detail::VideoParser* videoParser) { videoParser_ = videoParser; } + +protected: + bool parseVideoData(const uchar* data, size_t size, bool endOfStream = false); + +private: + detail::VideoParser* videoParser_; +}; + +class RawVideoSourceWrapper : public VideoSource +{ +public: + RawVideoSourceWrapper(const Ptr& source); + + FormatInfo format() const; + void start(); + void stop(); + bool isStarted() const; + bool hasError() const; + +private: + Ptr source_; + + Ptr thread_; + volatile bool stop_; + volatile bool hasError_; + + static void readLoop(void* userData); +}; + +}}} + +#endif // __GPUCODEC_VIDEO_SOURCE_H__ diff --git a/modules/gpucodec/test/test_video.cpp b/modules/gpucodec/test/test_video.cpp index a073a969a..26bcc02d5 100644 --- a/modules/gpucodec/test/test_video.cpp +++ b/modules/gpucodec/test/test_video.cpp @@ -57,19 +57,15 @@ GPU_TEST_P(Video, Reader) const std::string inputFile = std::string(cvtest::TS::ptr()->get_data_path()) + "video/" + GET_PARAM(1); - cv::gpu::VideoReader_GPU reader(inputFile); - ASSERT_TRUE(reader.isOpened()); + cv::Ptr reader = cv::gpucodec::createVideoReader(inputFile); cv::gpu::GpuMat frame; for (int i = 0; i < 10; ++i) { - ASSERT_TRUE(reader.read(frame)); + ASSERT_TRUE(reader->nextFrame(frame)); ASSERT_FALSE(frame.empty()); } - - reader.close(); - ASSERT_FALSE(reader.isOpened()); } ////////////////////////////////////////////////////// diff --git a/modules/superres/src/frame_source.cpp b/modules/superres/src/frame_source.cpp index cba2b14ea..7da817cfa 100644 --- a/modules/superres/src/frame_source.cpp +++ b/modules/superres/src/frame_source.cpp @@ -210,7 +210,7 @@ namespace private: String fileName_; - VideoReader_GPU reader_; + Ptr reader_; GpuMat frame_; }; @@ -223,13 +223,13 @@ namespace { if (_frame.kind() == _InputArray::GPU_MAT) { - bool res = reader_.read(_frame.getGpuMatRef()); + bool res = reader_->nextFrame(_frame.getGpuMatRef()); if (!res) _frame.release(); } else { - bool res = reader_.read(frame_); + bool res = reader_->nextFrame(frame_); if (!res) _frame.release(); else @@ -239,9 +239,7 @@ namespace void VideoFrameSource_GPU::reset() { - reader_.close(); - reader_.open(fileName_); - CV_Assert( reader_.isOpened() ); + reader_ = gpucodec::createVideoReader(fileName_); } } diff --git a/samples/gpu/video_reader.cpp b/samples/gpu/video_reader.cpp index 7eea72639..42f6f91db 100644 --- a/samples/gpu/video_reader.cpp +++ b/samples/gpu/video_reader.cpp @@ -30,8 +30,7 @@ int main(int argc, const char* argv[]) cv::VideoCapture reader(fname); cv::gpu::GpuMat d_frame; - cv::gpu::VideoReader_GPU d_reader(fname); - d_reader.dumpFormat(std::cout); + cv::Ptr d_reader = cv::gpucodec::createVideoReader(fname); cv::TickMeter tm; std::vector cpu_times; @@ -46,7 +45,7 @@ int main(int argc, const char* argv[]) cpu_times.push_back(tm.getTimeMilli()); tm.reset(); tm.start(); - if (!d_reader.read(d_frame)) + if (!d_reader->nextFrame(d_frame)) break; tm.stop(); gpu_times.push_back(tm.getTimeMilli()); From 4ebbf69134c1707b918e5e28bed6474f10c9ac26 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 12:07:57 +0400 Subject: [PATCH 025/121] switched to Input/Output Array in gpu::add --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 6 +- modules/gpuarithm/src/element_operations.cpp | 327 ++++++++++-------- .../test/test_element_operations.cpp | 88 +++++ 3 files changed, 267 insertions(+), 154 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 4edc29ba4..9634327af 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -51,10 +51,8 @@ namespace cv { namespace gpu { -//! adds one matrix to another (c = a + b) -CV_EXPORTS void add(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); -//! adds scalar to a matrix (c = a + s) -CV_EXPORTS void add(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); +//! adds one matrix to another (dst = src1 + src2) +CV_EXPORTS void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); //! subtracts one matrix from another (c = a - b) CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index e81833106..19789891d 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -47,8 +47,7 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -void cv::gpu::add(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); } -void cv::gpu::add(const GpuMat&, const Scalar&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); } void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); } @@ -302,98 +301,81 @@ namespace arithm void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) +static void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& _stream) { - using namespace arithm; - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = { { - addMat, - addMat, - addMat, - addMat, - addMat, - addMat, - addMat + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat }, { - addMat, - addMat, - addMat, - addMat, - addMat, - addMat, - addMat + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat }, { - 0 /*addMat*/, - 0 /*addMat*/, - addMat, - addMat, - addMat, - addMat, - addMat + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat }, { - 0 /*addMat*/, - 0 /*addMat*/, - addMat, - addMat, - addMat, - addMat, - addMat + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat, + arithm::addMat }, { - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - addMat, - addMat, - addMat + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + arithm::addMat, + arithm::addMat, + arithm::addMat }, { - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - addMat, - addMat + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + arithm::addMat, + arithm::addMat }, { - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - 0 /*addMat*/, - addMat + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + 0 /*arithm::addMat*/, + arithm::addMat } }; - if (dtype < 0) - dtype = src1.depth(); - const int sdepth = src1.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src1.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - CV_Assert( mask.empty() || (cn == 1 && mask.size() == src1.size() && mask.type() == CV_8U) ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); @@ -413,10 +395,10 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu { const int vcols = src1_.cols >> 2; - addMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + arithm::addMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -424,10 +406,10 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu { const int vcols = src1_.cols >> 1; - addMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + arithm::addMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -448,75 +430,73 @@ namespace arithm void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) +static void addScalar(const GpuMat& src, Scalar val, GpuMat& dst, const GpuMat& mask, Stream& _stream) { - using namespace arithm; - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = { { - addScalar, - addScalar, - addScalar, - addScalar, - addScalar, - addScalar, - addScalar + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar }, { - addScalar, - addScalar, - addScalar, - addScalar, - addScalar, - addScalar, - addScalar + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar }, { - 0 /*addScalar*/, - 0 /*addScalar*/, - addScalar, - addScalar, - addScalar, - addScalar, - addScalar + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar }, { - 0 /*addScalar*/, - 0 /*addScalar*/, - addScalar, - addScalar, - addScalar, - addScalar, - addScalar + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar }, { - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - addScalar, - addScalar, - addScalar + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + arithm::addScalar, + arithm::addScalar, + arithm::addScalar }, { - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - addScalar, - addScalar + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + arithm::addScalar, + arithm::addScalar }, { - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - 0 /*addScalar*/, - addScalar + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + 0 /*arithm::addScalar*/, + arithm::addScalar } }; @@ -532,31 +512,16 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat {0 , 0 , 0 , 0 } }; - if (dtype < 0) - dtype = src.depth(); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn <= 4 ); - CV_Assert( mask.empty() || (cn == 1 && mask.size() == src.size() && mask.type() == CV_8U) ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; if (ddepth == sdepth && cn > 1 && npp_func != 0) { - npp_func(src, sc, dst, stream); + npp_func(src, val, dst, stream); return; } @@ -567,7 +532,69 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src, sc.val[0], dst, mask, stream); + func(src, val[0], dst, mask, stream); +} + +void cv::gpu::add(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, int dtype, Stream& stream) +{ + const int kind1 = _src1.kind(); + const int kind2 = _src2.kind(); + + const bool isScalar1 = (kind1 == _InputArray::MATX); + const bool isScalar2 = (kind2 == _InputArray::MATX); + CV_Assert( !isScalar1 || !isScalar2 ); + + GpuMat src1; + if (!isScalar1) + src1 = _src1.getGpuMat(); + + GpuMat src2; + if (!isScalar2) + src2 = _src2.getGpuMat(); + + Mat scalar; + if (isScalar1) + scalar = _src1.getMat(); + else if (isScalar2) + scalar = _src2.getMat(); + + Scalar val; + if (!scalar.empty()) + { + CV_Assert( scalar.total() <= 4 ); + scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); + } + + GpuMat mask = _mask.getGpuMat(); + + const int sdepth = src1.empty() ? src2.depth() : src1.depth(); + const int cn = src1.empty() ? src2.channels() : src1.channels(); + const Size size = src1.empty() ? src2.size() : src1.size(); + + if (dtype < 0) + dtype = sdepth; + + const int ddepth = CV_MAT_DEPTH(dtype); + + CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); + CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) ); + CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) ); + + if (sdepth == CV_64F || ddepth == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); + } + + _dst.create(size, CV_MAKE_TYPE(ddepth, cn)); + GpuMat dst = _dst.getGpuMat(); + + if (isScalar1) + ::addScalar(src2, val, dst, mask, stream); + else if (isScalar2) + ::addScalar(src1, val, dst, mask, stream); + else + ::addMat(src1, src2, dst, mask, stream); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index 89f578fdd..6a98a9733 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -261,6 +261,94 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Scalar, testing::Combine( DEPTH_PAIRS, WHOLE_SUBMAT)); +//////////////////////////////////////////////////////////////////////////////// +// Add_Scalar_First + +PARAM_TEST_CASE(Add_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + std::pair depth; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Add_Scalar_First, WithOutMask) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::add(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::add(val, loadMat(mat, useRoi), dst, cv::gpu::GpuMat(), depth.second); + + cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); + cv::add(val, mat, dst_gold, cv::noArray(), depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +GPU_TEST_P(Add_Scalar_First, WithMask) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::add(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::add(val, loadMat(mat, useRoi), dst, loadMat(mask, useRoi), depth.second); + + cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); + cv::add(val, mat, dst_gold, mask, depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_Arithm, Add_Scalar_First, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + DEPTH_PAIRS, + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // Subtract_Array From 5330faf5a0aefd2a135485f476926bce40c86737 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 12:08:36 +0400 Subject: [PATCH 026/121] switched to Input/Output Array in gpu::subtract --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 6 +- modules/gpuarithm/src/cuda/sub_scalar.cu | 107 +++--- modules/gpuarithm/src/element_operations.cpp | 333 ++++++++++-------- .../test/test_element_operations.cpp | 88 +++++ 4 files changed, 324 insertions(+), 210 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 9634327af..ea3593bdc 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -54,10 +54,8 @@ namespace cv { namespace gpu { //! adds one matrix to another (dst = src1 + src2) CV_EXPORTS void add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); -//! subtracts one matrix from another (c = a - b) -CV_EXPORTS void subtract(const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); -//! subtracts scalar from a matrix (c = a - s) -CV_EXPORTS void subtract(const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask = GpuMat(), int dtype = -1, Stream& stream = Stream::Null()); +//! subtracts one matrix from another (dst = src1 - src2) +CV_EXPORTS void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); //! computes element-wise weighted product of the two arrays (c = scale * a * b) CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/cuda/sub_scalar.cu b/modules/gpuarithm/src/cuda/sub_scalar.cu index 05c0cc703..619ab4310 100644 --- a/modules/gpuarithm/src/cuda/sub_scalar.cu +++ b/modules/gpuarithm/src/cuda/sub_scalar.cu @@ -58,12 +58,13 @@ namespace arithm template struct SubScalar : unary_function { S val; + int scale; - __host__ explicit SubScalar(S val_) : val(val_) {} + __host__ SubScalar(S val_, int scale_) : val(val_), scale(scale_) {} __device__ __forceinline__ D operator ()(T a) const { - return saturate_cast(a - val); + return saturate_cast(scale * (a - val)); } }; } @@ -78,9 +79,9 @@ namespace cv { namespace gpu { namespace cudev namespace arithm { template - void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) + void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream) { - SubScalar op(static_cast(val)); + SubScalar op(static_cast(val), inv ? -1 : 1); if (mask.data) cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, mask, stream); @@ -88,61 +89,61 @@ namespace arithm cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); } - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - //template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); - template void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + //template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + template void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } #endif // CUDA_DISABLER diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 19789891d..1e2feaadd 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -49,8 +49,7 @@ using namespace cv::gpu; void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::subtract(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); } -void cv::gpu::subtract(const GpuMat&, const Scalar&, GpuMat&, const GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } @@ -609,98 +608,81 @@ namespace arithm void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) +static void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& _stream) { - using namespace arithm; - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = { { - subMat, - subMat, - subMat, - subMat, - subMat, - subMat, - subMat + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat }, { - subMat, - subMat, - subMat, - subMat, - subMat, - subMat, - subMat + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat }, { - 0 /*subMat*/, - 0 /*subMat*/, - subMat, - subMat, - subMat, - subMat, - subMat + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat }, { - 0 /*subMat*/, - 0 /*subMat*/, - subMat, - subMat, - subMat, - subMat, - subMat + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat, + arithm::subMat }, { - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - subMat, - subMat, - subMat + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + arithm::subMat, + arithm::subMat, + arithm::subMat }, { - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - subMat, - subMat + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + arithm::subMat, + arithm::subMat }, { - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - 0 /*subMat*/, - subMat + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + 0 /*arithm::subMat*/, + arithm::subMat } }; - if (dtype < 0) - dtype = src1.depth(); - const int sdepth = src1.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src1.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - CV_Assert( mask.empty() || (cn == 1 && mask.size() == src1.size() && mask.type() == CV_8U) ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); @@ -720,10 +702,10 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons { const int vcols = src1_.cols >> 2; - subMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + arithm::subMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -731,10 +713,10 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons { const int vcols = src1_.cols >> 1; - subMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + arithm::subMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -752,78 +734,76 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons namespace arithm { template - void subScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) +static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& _stream) { - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = { { - subScalar, - subScalar, - subScalar, - subScalar, - subScalar, - subScalar, - subScalar + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar }, { - subScalar, - subScalar, - subScalar, - subScalar, - subScalar, - subScalar, - subScalar + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar }, { - 0 /*subScalar*/, - 0 /*subScalar*/, - subScalar, - subScalar, - subScalar, - subScalar, - subScalar + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar }, { - 0 /*subScalar*/, - 0 /*subScalar*/, - subScalar, - subScalar, - subScalar, - subScalar, - subScalar + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar }, { - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - subScalar, - subScalar, - subScalar + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + arithm::subScalar, + arithm::subScalar, + arithm::subScalar }, { - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - subScalar, - subScalar + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + arithm::subScalar, + arithm::subScalar }, { - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - 0 /*subScalar*/, - subScalar + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + 0 /*arithm::subScalar*/, + arithm::subScalar } }; @@ -839,31 +819,16 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G {0 , 0 , 0 , 0 } }; - if (dtype < 0) - dtype = src.depth(); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn <= 4 ); - CV_Assert( mask.empty() || (cn == 1 && mask.size() == src.size() && mask.type() == CV_8U) ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; - if (ddepth == sdepth && cn > 1 && npp_func != 0) + if (ddepth == sdepth && cn > 1 && npp_func != 0 && !inv) { - npp_func(src, sc, dst, stream); + npp_func(src, val, dst, stream); return; } @@ -874,7 +839,69 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src, sc.val[0], dst, mask, stream); + func(src, val[0], inv, dst, mask, stream); +} + +void cv::gpu::subtract(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, int dtype, Stream& stream) +{ + const int kind1 = _src1.kind(); + const int kind2 = _src2.kind(); + + const bool isScalar1 = (kind1 == _InputArray::MATX); + const bool isScalar2 = (kind2 == _InputArray::MATX); + CV_Assert( !isScalar1 || !isScalar2 ); + + GpuMat src1; + if (!isScalar1) + src1 = _src1.getGpuMat(); + + GpuMat src2; + if (!isScalar2) + src2 = _src2.getGpuMat(); + + Mat scalar; + if (isScalar1) + scalar = _src1.getMat(); + else if (isScalar2) + scalar = _src2.getMat(); + + Scalar val; + if (!scalar.empty()) + { + CV_Assert( scalar.total() <= 4 ); + scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); + } + + GpuMat mask = _mask.getGpuMat(); + + const int sdepth = src1.empty() ? src2.depth() : src1.depth(); + const int cn = src1.empty() ? src2.channels() : src1.channels(); + const Size size = src1.empty() ? src2.size() : src1.size(); + + if (dtype < 0) + dtype = sdepth; + + const int ddepth = CV_MAT_DEPTH(dtype); + + CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); + CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) ); + CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) ); + + if (sdepth == CV_64F || ddepth == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); + } + + _dst.create(size, CV_MAKE_TYPE(ddepth, cn)); + GpuMat dst = _dst.getGpuMat(); + + if (isScalar1) + ::subScalar(src2, val, true, dst, mask, stream); + else if (isScalar2) + ::subScalar(src1, val, false, dst, mask, stream); + else + ::subMat(src1, src2, dst, mask, stream); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index 6a98a9733..73974d3ad 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -564,6 +564,94 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Scalar, testing::Combine( DEPTH_PAIRS, WHOLE_SUBMAT)); +//////////////////////////////////////////////////////////////////////////////// +// Subtract_Scalar_First + +PARAM_TEST_CASE(Subtract_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + std::pair depth; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Subtract_Scalar_First, WithOutMask) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::subtract(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::subtract(val, loadMat(mat, useRoi), dst, cv::gpu::GpuMat(), depth.second); + + cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); + cv::subtract(val, mat, dst_gold, cv::noArray(), depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +GPU_TEST_P(Subtract_Scalar_First, WithMask) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::subtract(val, loadMat(mat), dst, cv::gpu::GpuMat(), depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + dst.setTo(cv::Scalar::all(0)); + cv::gpu::subtract(val, loadMat(mat, useRoi), dst, loadMat(mask, useRoi), depth.second); + + cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); + cv::subtract(val, mat, dst_gold, mask, depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_Arithm, Subtract_Scalar_First, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + DEPTH_PAIRS, + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // Multiply_Array From ffa25be3d254778067abf0ea485964504b98f7d9 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 12:21:26 +0400 Subject: [PATCH 027/121] added arithm_op function to reduce code duplication --- modules/gpuarithm/src/element_operations.cpp | 205 ++++++++----------- 1 file changed, 84 insertions(+), 121 deletions(-) diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 1e2feaadd..400968453 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -115,6 +115,83 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, #else +//////////////////////////////////////////////////////////////////////// +// arithm_op + +namespace +{ + typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream); + typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream); + + void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, double scale, int dtype, Stream& stream, + mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func) + { + const int kind1 = _src1.kind(); + const int kind2 = _src2.kind(); + + const bool isScalar1 = (kind1 == _InputArray::MATX); + const bool isScalar2 = (kind2 == _InputArray::MATX); + CV_Assert( !isScalar1 || !isScalar2 ); + + GpuMat src1; + if (!isScalar1) + src1 = _src1.getGpuMat(); + + GpuMat src2; + if (!isScalar2) + src2 = _src2.getGpuMat(); + + Mat scalar; + if (isScalar1) + scalar = _src1.getMat(); + else if (isScalar2) + scalar = _src2.getMat(); + + Scalar val; + if (!scalar.empty()) + { + CV_Assert( scalar.total() <= 4 ); + scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); + val[0] *= scale; + val[1] *= scale; + val[2] *= scale; + val[3] *= scale; + } + + GpuMat mask = _mask.getGpuMat(); + + const int sdepth = src1.empty() ? src2.depth() : src1.depth(); + const int cn = src1.empty() ? src2.channels() : src1.channels(); + const Size size = src1.empty() ? src2.size() : src1.size(); + + if (dtype < 0) + dtype = sdepth; + + const int ddepth = CV_MAT_DEPTH(dtype); + + CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); + CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) ); + CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) ); + + if (sdepth == CV_64F || ddepth == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); + } + + _dst.create(size, CV_MAKE_TYPE(ddepth, cn)); + GpuMat dst = _dst.getGpuMat(); + + if (isScalar1) + mat_scalar_func(src2, val, true, dst, mask, stream); + else if (isScalar2) + mat_scalar_func(src1, val, false, dst, mask, stream); + else + mat_mat_func(src1, src2, dst, mask, scale, stream); + } +} + + //////////////////////////////////////////////////////////////////////// // Basic arithmetical operations (add subtract multiply divide) @@ -300,7 +377,7 @@ namespace arithm void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& _stream) +static void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -429,7 +506,7 @@ namespace arithm void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void addScalar(const GpuMat& src, Scalar val, GpuMat& dst, const GpuMat& mask, Stream& _stream) +static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -534,66 +611,9 @@ static void addScalar(const GpuMat& src, Scalar val, GpuMat& dst, const GpuMat& func(src, val[0], dst, mask, stream); } -void cv::gpu::add(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, int dtype, Stream& stream) +void cv::gpu::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream) { - const int kind1 = _src1.kind(); - const int kind2 = _src2.kind(); - - const bool isScalar1 = (kind1 == _InputArray::MATX); - const bool isScalar2 = (kind2 == _InputArray::MATX); - CV_Assert( !isScalar1 || !isScalar2 ); - - GpuMat src1; - if (!isScalar1) - src1 = _src1.getGpuMat(); - - GpuMat src2; - if (!isScalar2) - src2 = _src2.getGpuMat(); - - Mat scalar; - if (isScalar1) - scalar = _src1.getMat(); - else if (isScalar2) - scalar = _src2.getMat(); - - Scalar val; - if (!scalar.empty()) - { - CV_Assert( scalar.total() <= 4 ); - scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); - } - - GpuMat mask = _mask.getGpuMat(); - - const int sdepth = src1.empty() ? src2.depth() : src1.depth(); - const int cn = src1.empty() ? src2.channels() : src1.channels(); - const Size size = src1.empty() ? src2.size() : src1.size(); - - if (dtype < 0) - dtype = sdepth; - - const int ddepth = CV_MAT_DEPTH(dtype); - - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) ); - CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(size, CV_MAKE_TYPE(ddepth, cn)); - GpuMat dst = _dst.getGpuMat(); - - if (isScalar1) - ::addScalar(src2, val, dst, mask, stream); - else if (isScalar2) - ::addScalar(src1, val, dst, mask, stream); - else - ::addMat(src1, src2, dst, mask, stream); + arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, addMat, addScalar); } //////////////////////////////////////////////////////////////////////// @@ -608,7 +628,7 @@ namespace arithm void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& _stream) +static void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -842,66 +862,9 @@ static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, cons func(src, val[0], inv, dst, mask, stream); } -void cv::gpu::subtract(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, int dtype, Stream& stream) +void cv::gpu::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, int dtype, Stream& stream) { - const int kind1 = _src1.kind(); - const int kind2 = _src2.kind(); - - const bool isScalar1 = (kind1 == _InputArray::MATX); - const bool isScalar2 = (kind2 == _InputArray::MATX); - CV_Assert( !isScalar1 || !isScalar2 ); - - GpuMat src1; - if (!isScalar1) - src1 = _src1.getGpuMat(); - - GpuMat src2; - if (!isScalar2) - src2 = _src2.getGpuMat(); - - Mat scalar; - if (isScalar1) - scalar = _src1.getMat(); - else if (isScalar2) - scalar = _src2.getMat(); - - Scalar val; - if (!scalar.empty()) - { - CV_Assert( scalar.total() <= 4 ); - scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); - } - - GpuMat mask = _mask.getGpuMat(); - - const int sdepth = src1.empty() ? src2.depth() : src1.depth(); - const int cn = src1.empty() ? src2.channels() : src1.channels(); - const Size size = src1.empty() ? src2.size() : src1.size(); - - if (dtype < 0) - dtype = sdepth; - - const int ddepth = CV_MAT_DEPTH(dtype); - - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( !scalar.empty() || (src2.type() == src1.type() && src2.size() == src1.size()) ); - CV_Assert( mask.empty() || (cn == 1 && mask.size() == size && mask.type() == CV_8UC1) ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - _dst.create(size, CV_MAKE_TYPE(ddepth, cn)); - GpuMat dst = _dst.getGpuMat(); - - if (isScalar1) - ::subScalar(src2, val, true, dst, mask, stream); - else if (isScalar2) - ::subScalar(src1, val, false, dst, mask, stream); - else - ::subMat(src1, src2, dst, mask, stream); + arithm_op(src1, src2, dst, mask, 1.0, dtype, stream, subMat, subScalar); } //////////////////////////////////////////////////////////////////////// From 4595e3aa3ee3c2d331e5a86e880c360e3a0f4605 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 12:55:38 +0400 Subject: [PATCH 028/121] switched to Input/Output Array in gpu::multiply --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 6 +- modules/gpuarithm/src/element_operations.cpp | 348 ++++++++---------- .../test/test_element_operations.cpp | 87 +++++ 3 files changed, 252 insertions(+), 189 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index ea3593bdc..2b367fa1b 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -57,10 +57,8 @@ CV_EXPORTS void add(InputArray src1, InputArray src2, OutputArray dst, InputArra //! subtracts one matrix from another (dst = src1 - src2) CV_EXPORTS void subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted product of the two arrays (c = scale * a * b) -CV_EXPORTS void multiply(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! weighted multiplies matrix to a scalar (c = scale * a * s) -CV_EXPORTS void multiply(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); +//! computes element-wise weighted product of the two arrays (dst = scale * src1 * src2) +CV_EXPORTS void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); //! computes element-wise weighted quotient of the two arrays (c = a / b) CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 400968453..76bfb9ba3 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -51,8 +51,7 @@ void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::multiply(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::multiply(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } +void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } @@ -880,127 +879,92 @@ namespace arithm void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } -void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) +static void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { - using namespace arithm; - - cudaStream_t stream = StreamAccessor::getStream(s); - - if (src1.type() == CV_8UC4 && src2.type() == CV_32FC1) + typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + static const func_t funcs[7][7] = { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - mulMat_8uc4_32f(src1, src2, dst, stream); - } - else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) - { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - mulMat_16sc4_32f(src1, src2, dst, stream); - } - else - { - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - static const func_t funcs[7][7] = { - { - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat, - mulMat - }, - { - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - 0 /*mulMat*/, - mulMat - } - }; - - if (dtype < 0) - dtype = src1.depth(); - - const int sdepth = src1.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); - const int cn = src1.channels(); - - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - - if (sdepth == CV_64F || ddepth == CV_64F) + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat, + arithm::mulMat + }, + { + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + 0 /*arithm::mulMat*/, + arithm::mulMat } + }; - dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); + const int sdepth = src1.depth(); + const int ddepth = dst.depth(); + const int cn = src1.channels(); - PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); - PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); - PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); + cudaStream_t stream = StreamAccessor::getStream(_stream); - const func_t func = funcs[sdepth][ddepth]; + PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); + PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); + PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + const func_t func = funcs[sdepth][ddepth]; - func(src1_, src2_, dst_, scale, stream); - } + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, scale, stream); } namespace arithm @@ -1009,75 +973,73 @@ namespace arithm void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) +static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, Stream& _stream) { - using namespace arithm; - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = { { - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar, - mulScalar, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar, + arithm::mulScalar }, { - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - 0 /*mulScalar*/, - mulScalar + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + 0 /*arithm::mulScalar*/, + arithm::mulScalar } }; @@ -1093,32 +1055,16 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double {0 , 0, 0 , 0 } }; - if (dtype < 0) - dtype = src.depth(); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn <= 4 ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const Scalar nsc(sc.val[0] * scale, sc.val[1] * scale, sc.val[2] * scale, sc.val[3] * scale); + cudaStream_t stream = StreamAccessor::getStream(_stream); const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; if (ddepth == sdepth && cn > 1 && npp_func != 0) { - npp_func(src, nsc, dst, stream); + npp_func(src, val, dst, stream); return; } @@ -1129,7 +1075,39 @@ void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src, nsc.val[0], dst, stream); + func(src, val[0], dst, stream); +} + +void cv::gpu::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream) +{ + if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) + { + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); + + arithm::mulMat_8uc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); + } + else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) + { + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); + + arithm::mulMat_16sc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); + } + else + { + arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, mulMat, mulScalar); + } } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index 73974d3ad..ece38311e 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -932,6 +932,93 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar, testing::Combine( DEPTH_PAIRS, WHOLE_SUBMAT)); +//////////////////////////////////////////////////////////////////////////////// +// Multiply_Scalar_First + +PARAM_TEST_CASE(Multiply_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + std::pair depth; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Multiply_Scalar_First, WithOutScale) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::multiply(val, loadMat(mat), dst, 1, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + cv::gpu::multiply(val, loadMat(mat, useRoi), dst, 1, depth.second); + + cv::Mat dst_gold; + cv::multiply(val, mat, dst_gold, 1, depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, 1.0); + } +} + + +GPU_TEST_P(Multiply_Scalar_First, WithScale) +{ + cv::Mat mat = randomMat(size, depth.first); + cv::Scalar val = randomScalar(0, 255); + double scale = randomDouble(0.0, 255.0); + + if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::multiply(val, loadMat(mat), dst, scale, depth.second); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); + cv::gpu::multiply(val, loadMat(mat, useRoi), dst, scale, depth.second); + + cv::Mat dst_gold; + cv::multiply(val, mat, dst_gold, scale, depth.second); + + EXPECT_MAT_NEAR(dst_gold, dst, 1.0); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_Arithm, Multiply_Scalar_First, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + DEPTH_PAIRS, + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // Divide_Array From 3ee12cbeb4ff8622af41983eff140534564a6d1c Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 26 Apr 2013 14:39:58 +0400 Subject: [PATCH 029/121] switched to Input/Output Array in gpu::divide --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 12 +- modules/gpuarithm/src/cuda/div_inv.cu | 144 ------ modules/gpuarithm/src/cuda/div_scalar.cu | 128 +++-- modules/gpuarithm/src/element_operations.cpp | 469 +++++++----------- .../test/test_element_operations.cpp | 8 +- 5 files changed, 265 insertions(+), 496 deletions(-) delete mode 100644 modules/gpuarithm/src/cuda/div_inv.cu diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 2b367fa1b..2fc6b48de 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -60,12 +60,14 @@ CV_EXPORTS void subtract(InputArray src1, InputArray src2, OutputArray dst, Inpu //! computes element-wise weighted product of the two arrays (dst = scale * src1 * src2) CV_EXPORTS void multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted quotient of the two arrays (c = a / b) -CV_EXPORTS void divide(const GpuMat& a, const GpuMat& b, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); -//! computes element-wise weighted quotient of matrix and scalar (c = a / s) -CV_EXPORTS void divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); +//! computes element-wise weighted quotient of the two arrays (dst = scale * (src1 / src2)) +CV_EXPORTS void divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()); + //! computes element-wise weighted reciprocal of an array (dst = scale/src2) -CV_EXPORTS void divide(double scale, const GpuMat& b, GpuMat& c, int dtype = -1, Stream& stream = Stream::Null()); +static inline void divide(double src1, InputArray src2, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null()) +{ + divide(src1, src2, dst, 1.0, dtype, stream); +} //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, diff --git a/modules/gpuarithm/src/cuda/div_inv.cu b/modules/gpuarithm/src/cuda/div_inv.cu deleted file mode 100644 index 9cfda933c..000000000 --- a/modules/gpuarithm/src/cuda/div_inv.cu +++ /dev/null @@ -1,144 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2009, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#if !defined CUDA_DISABLER - -#include "opencv2/core/cuda/common.hpp" -#include "opencv2/core/cuda/functional.hpp" -#include "opencv2/core/cuda/transform.hpp" -#include "opencv2/core/cuda/saturate_cast.hpp" -#include "opencv2/core/cuda/simd_functions.hpp" - -#include "arithm_func_traits.hpp" - -using namespace cv::gpu; -using namespace cv::gpu::cudev; - -namespace arithm -{ - template struct DivInv : unary_function - { - S val; - - __host__ explicit DivInv(S val_) : val(val_) {} - - __device__ __forceinline__ D operator ()(T a) const - { - return a != 0 ? saturate_cast(val / a) : 0; - } - }; -} - -namespace cv { namespace gpu { namespace cudev -{ - template struct TransformFunctorTraits< arithm::DivInv > : arithm::ArithmFuncTraits - { - }; -}}} - -namespace arithm -{ - template - void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) - { - DivInv op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); - } - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -#endif // CUDA_DISABLER diff --git a/modules/gpuarithm/src/cuda/div_scalar.cu b/modules/gpuarithm/src/cuda/div_scalar.cu index 42ba90cb0..464c4adf8 100644 --- a/modules/gpuarithm/src/cuda/div_scalar.cu +++ b/modules/gpuarithm/src/cuda/div_scalar.cu @@ -66,6 +66,18 @@ namespace arithm return saturate_cast(a / val); } }; + + template struct DivScalarInv : unary_function + { + S val; + + explicit DivScalarInv(S val_) : val(val_) {} + + __device__ __forceinline__ D operator ()(T a) const + { + return a != 0 ? saturate_cast(val / a) : 0; + } + }; } namespace cv { namespace gpu { namespace cudev @@ -73,72 +85,84 @@ namespace cv { namespace gpu { namespace cudev template struct TransformFunctorTraits< arithm::DivScalar > : arithm::ArithmFuncTraits { }; + + template struct TransformFunctorTraits< arithm::DivScalarInv > : arithm::ArithmFuncTraits + { + }; }}} namespace arithm { template - void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream) + void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream) { - DivScalar op(static_cast(val)); - cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); + if (inv) + { + DivScalarInv op(static_cast(val)); + cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); + } + else + { + DivScalar op(static_cast(val)); + cudev::transform((PtrStepSz) src1, (PtrStepSz) dst, op, WithOutMask(), stream); + } } - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - //template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - template void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + //template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); + template void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); } #endif // CUDA_DISABLER diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 76bfb9ba3..24f67b7db 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -53,9 +53,7 @@ void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Str void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::divide(const GpuMat&, const GpuMat&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::divide(const GpuMat&, const Scalar&, GpuMat&, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::divide(double, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } @@ -120,7 +118,7 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, namespace { typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream); - typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream); + typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream); void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, double scale, int dtype, Stream& stream, mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func) @@ -151,10 +149,6 @@ namespace { CV_Assert( scalar.total() <= 4 ); scalar.convertTo(Mat_(scalar.rows, scalar.cols, &val[0]), CV_64F); - val[0] *= scale; - val[1] *= scale; - val[2] *= scale; - val[3] *= scale; } GpuMat mask = _mask.getGpuMat(); @@ -182,9 +176,9 @@ namespace GpuMat dst = _dst.getGpuMat(); if (isScalar1) - mat_scalar_func(src2, val, true, dst, mask, stream); + mat_scalar_func(src2, val, true, dst, mask, scale, stream); else if (isScalar2) - mat_scalar_func(src1, val, false, dst, mask, stream); + mat_scalar_func(src1, val, false, dst, mask, scale, stream); else mat_mat_func(src1, src2, dst, mask, scale, stream); } @@ -505,7 +499,7 @@ namespace arithm void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, Stream& _stream) +static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -756,7 +750,7 @@ namespace arithm void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& _stream) +static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -973,7 +967,7 @@ namespace arithm void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } -static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, Stream& _stream) +static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = @@ -1061,6 +1055,11 @@ static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const Gp cudaStream_t stream = StreamAccessor::getStream(_stream); + val[0] *= scale; + val[1] *= scale; + val[2] *= scale; + val[3] *= scale; + const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; if (ddepth == sdepth && cn > 1 && npp_func != 0) { @@ -1123,204 +1122,167 @@ namespace arithm void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } -void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) +static void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { - using namespace arithm; - - cudaStream_t stream = StreamAccessor::getStream(s); - - if (src1.type() == CV_8UC4 && src2.type() == CV_32FC1) + typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); + static const func_t funcs[7][7] = { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - divMat_8uc4_32f(src1, src2, dst, stream); - } - else if (src1.type() == CV_16SC4 && src2.type() == CV_32FC1) - { - CV_Assert( src1.size() == src2.size() ); - - dst.create(src1.size(), src1.type()); - - divMat_16sc4_32f(src1, src2, dst, stream); - } - else - { - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); - static const func_t funcs[7][7] = { - { - divMat, - divMat, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - divMat, - divMat, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - divMat, - divMat - }, - { - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - 0 /*divMat*/, - divMat - } - }; - - if (dtype < 0) - dtype = src1.depth(); - - const int sdepth = src1.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); - const int cn = src1.channels(); - - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - - if (sdepth == CV_64F || ddepth == CV_64F) + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat, + arithm::divMat + }, + { + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + 0 /*arithm::divMat*/, + arithm::divMat } + }; - dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); + const int sdepth = src1.depth(); + const int ddepth = dst.depth(); + const int cn = src1.channels(); - PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); - PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); - PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); + cudaStream_t stream = StreamAccessor::getStream(_stream); - const func_t func = funcs[sdepth][ddepth]; + PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); + PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); + PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + const func_t func = funcs[sdepth][ddepth]; - func(src1_, src2_, dst_, scale, stream); - } + if (!func) + CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); + + func(src1_, src2_, dst_, scale, stream); } namespace arithm { template - void divScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) +static void divScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) { - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); + typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = { { - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar, - divScalar, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar, + arithm::divScalar }, { - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - 0 /*divScalar*/, - divScalar + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + 0 /*arithm::divScalar*/, + arithm::divScalar } }; @@ -1336,32 +1298,31 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc {0 , 0, 0 , 0 } }; - if (dtype < 0) - dtype = src.depth(); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); + const int ddepth = dst.depth(); const int cn = src.channels(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn <= 4 ); + cudaStream_t stream = StreamAccessor::getStream(_stream); - if (sdepth == CV_64F || ddepth == CV_64F) + if (inv) { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + val[0] *= scale; + val[1] *= scale; + val[2] *= scale; + val[3] *= scale; + } + else + { + val[0] /= scale; + val[1] /= scale; + val[2] /= scale; + val[3] /= scale; } - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const Scalar nsc(sc.val[0] / scale, sc.val[1] / scale, sc.val[2] / scale, sc.val[3] / scale); - const npp_func_t npp_func = npp_funcs[sdepth][cn - 1]; - if (ddepth == sdepth && cn > 1 && npp_func != 0) + if (ddepth == sdepth && cn > 1 && npp_func != 0 && !inv) { - npp_func(src, nsc, dst, stream); + npp_func(src, val, dst, stream); return; } @@ -1372,113 +1333,39 @@ void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double sc if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - func(src, nsc.val[0], dst, stream); + func(src, val[0], inv, dst, stream); } -namespace arithm +void cv::gpu::divide(InputArray _src1, InputArray _src2, OutputArray _dst, double scale, int dtype, Stream& stream) { - template - void divInv(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[7][7] = + if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) { - { - divInv, - divInv, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - divInv, - divInv, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - divInv, - divInv - }, - { - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - 0 /*divInv*/, - divInv - } - }; + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - if (dtype < 0) - dtype = src.depth(); + CV_Assert( src1.size() == src2.size() ); - const int sdepth = src.depth(); - const int ddepth = CV_MAT_DEPTH(dtype); - const int cn = src.channels(); + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); - CV_Assert( sdepth <= CV_64F && ddepth <= CV_64F ); - CV_Assert( cn == 1 ); - - if (sdepth == CV_64F || ddepth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + arithm::divMat_8uc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); } + else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) + { + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - dst.create(src.size(), CV_MAKE_TYPE(ddepth, cn)); + CV_Assert( src1.size() == src2.size() ); - cudaStream_t stream = StreamAccessor::getStream(s); + _dst.create(src1.size(), src1.type()); + GpuMat dst = _dst.getGpuMat(); - const func_t func = funcs[sdepth][ddepth]; - - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - - func(src, scale, dst, stream); + arithm::divMat_16sc4_32f(src1, src2, dst, StreamAccessor::getStream(stream)); + } + else + { + arithm_op(_src1, _src2, _dst, GpuMat(), scale, dtype, stream, divMat, divScalar); + } } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index ece38311e..345c96015 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -1299,9 +1299,9 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Divide_Scalar, testing::Combine( WHOLE_SUBMAT)); //////////////////////////////////////////////////////////////////////////////// -// Divide_Scalar_Inv +// Divide_Scalar_First -PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) +PARAM_TEST_CASE(Divide_Scalar_First, cv::gpu::DeviceInfo, cv::Size, std::pair, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; @@ -1319,7 +1319,7 @@ PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair Date: Thu, 25 Apr 2013 13:37:44 +0400 Subject: [PATCH 030/121] switched to Input/Output Array in gpu::absdiff --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 8 +- modules/gpuarithm/src/element_operations.cpp | 86 +++++++------------ .../test/test_element_operations.cpp | 29 +++++++ 3 files changed, 64 insertions(+), 59 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 2fc6b48de..67fdc819e 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -69,6 +69,9 @@ static inline void divide(double src1, InputArray src2, OutputArray dst, int dty divide(src1, src2, dst, 1.0, dtype, stream); } +//! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2)) +CV_EXPORTS void absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -79,11 +82,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! computes element-wise absolute difference of two arrays (c = abs(a - b)) -CV_EXPORTS void absdiff(const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream = Stream::Null()); -//! computes element-wise absolute difference of array and scalar (c = abs(a - s)) -CV_EXPORTS void absdiff(const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream = Stream::Null()); - //! computes absolute value of each matrix element //! supports CV_16S and CV_32F depth CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 24f67b7db..181f5161d 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -55,8 +55,7 @@ void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream& void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } -void cv::gpu::absdiff(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::absdiff(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::abs(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } @@ -1380,37 +1379,24 @@ namespace arithm void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s) +static void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream) { - using namespace arithm; - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[] = { - absDiffMat, - absDiffMat, - absDiffMat, - absDiffMat, - absDiffMat, - absDiffMat, - absDiffMat + arithm::absDiffMat, + arithm::absDiffMat, + arithm::absDiffMat, + arithm::absDiffMat, + arithm::absDiffMat, + arithm::absDiffMat, + arithm::absDiffMat }; const int depth = src1.depth(); const int cn = src1.channels(); - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); @@ -1430,10 +1416,10 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea { const int vcols = src1_.cols >> 2; - absDiffMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + arithm::absDiffMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -1441,10 +1427,10 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea { const int vcols = src1_.cols >> 1; - absDiffMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + arithm::absDiffMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -1465,36 +1451,28 @@ namespace arithm void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& stream) +static void absDiffScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream) { - using namespace arithm; - typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[] = { - absDiffScalar, - absDiffScalar, - absDiffScalar, - absDiffScalar, - absDiffScalar, - absDiffScalar, - absDiffScalar + arithm::absDiffScalar, + arithm::absDiffScalar, + arithm::absDiffScalar, + arithm::absDiffScalar, + arithm::absDiffScalar, + arithm::absDiffScalar, + arithm::absDiffScalar }; - const int depth = src1.depth(); + const int depth = src.depth(); - CV_Assert( depth <= CV_64F ); - CV_Assert( src1.channels() == 1 ); + funcs[depth](src, val[0], dst, StreamAccessor::getStream(stream)); +} - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), src1.type()); - - funcs[depth](src1, src2.val[0], dst, StreamAccessor::getStream(stream)); +void cv::gpu::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) +{ + arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, absDiffMat, absDiffScalar); } ////////////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index 345c96015..0515a23ad 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -1433,6 +1433,35 @@ GPU_TEST_P(AbsDiff, Scalar) } } +GPU_TEST_P(AbsDiff, Scalar_First) +{ + cv::Mat src = randomMat(size, depth); + cv::Scalar val = randomScalar(0.0, 255.0); + + if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::absdiff(val, loadMat(src), dst); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth, useRoi); + cv::gpu::absdiff(val, loadMat(src, useRoi), dst); + + cv::Mat dst_gold; + cv::absdiff(val, src, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, depth <= CV_32F ? 1.0 : 1e-5); + } +} + INSTANTIATE_TEST_CASE_P(GPU_Arithm, AbsDiff, testing::Combine( ALL_DEVICES, DIFFERENT_SIZES, From b866890b2768d1a9f56a7218d45b08bdebe70370 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 13:49:45 +0400 Subject: [PATCH 031/121] switched to Input/Output Array in abs, sqr, sqrt, exp, log, pow --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 46 ++-- modules/gpuarithm/src/element_operations.cpp | 208 ++++++++++-------- .../test/test_element_operations.cpp | 118 +++++----- 3 files changed, 192 insertions(+), 180 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 67fdc819e..67dab25ac 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -72,6 +72,26 @@ static inline void divide(double src1, InputArray src2, OutputArray dst, int dty //! computes element-wise absolute difference of two arrays (dst = abs(src1 - src2)) CV_EXPORTS void absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); +//! computes absolute value of each matrix element +CV_EXPORTS void abs(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +//! computes square of each pixel in an image +CV_EXPORTS void sqr(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +//! computes square root of each pixel in an image +CV_EXPORTS void sqrt(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +//! computes exponent of each matrix element +CV_EXPORTS void exp(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +//! computes natural logarithm of absolute value of each matrix element +CV_EXPORTS void log(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + +//! computes power of each matrix element: +//! (dst(i,j) = pow( src(i,j) , power), if src.type() is integer +//! (dst(i,j) = pow(fabs(src(i,j)), power), otherwise +CV_EXPORTS void pow(InputArray src, double power, OutputArray dst, Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -82,32 +102,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! computes absolute value of each matrix element -//! supports CV_16S and CV_32F depth -CV_EXPORTS void abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes square of each pixel in an image -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes square root of each pixel in an image -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes exponent of each matrix element (b = e**a) -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void exp(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); - -//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a)) -//! supports CV_8U, CV_16U, CV_16S and CV_32F depth -CV_EXPORTS void log(const GpuMat& a, GpuMat& b, Stream& stream = Stream::Null()); - -//! computes power of each matrix element: -// (dst(i,j) = pow( src(i,j) , power), if src.type() is integer -// (dst(i,j) = pow(fabs(src(i,j)), power), otherwise -//! supports all, except depth == CV_64F -CV_EXPORTS void pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null()); - //! compares elements of two arrays (c = a b) CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 181f5161d..b93f30052 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -57,17 +57,17 @@ void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) void cv::gpu::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::abs(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::sqr(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::sqrt(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::exp(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::log(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::pow(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); } void cv::gpu::compare(const GpuMat&, Scalar, GpuMat&, int, Stream&) { throw_no_cuda(); } @@ -1484,7 +1484,7 @@ namespace arithm void absMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) +void cv::gpu::abs(InputArray _src, OutputArray _dst, Stream& stream) { using namespace arithm; @@ -1500,6 +1500,8 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) absMat }; + GpuMat src = _src.getGpuMat(); + const int depth = src.depth(); CV_Assert( depth <= CV_64F ); @@ -1511,7 +1513,8 @@ void cv::gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); funcs[depth](src, dst, StreamAccessor::getStream(stream)); } @@ -1525,7 +1528,7 @@ namespace arithm void sqrMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) +void cv::gpu::sqr(InputArray _src, OutputArray _dst, Stream& stream) { using namespace arithm; @@ -1541,6 +1544,8 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) sqrMat }; + GpuMat src = _src.getGpuMat(); + const int depth = src.depth(); CV_Assert( depth <= CV_64F ); @@ -1552,7 +1557,8 @@ void cv::gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); funcs[depth](src, dst, StreamAccessor::getStream(stream)); } @@ -1566,7 +1572,7 @@ namespace arithm void sqrtMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) +void cv::gpu::sqrt(InputArray _src, OutputArray _dst, Stream& stream) { using namespace arithm; @@ -1582,46 +1588,7 @@ void cv::gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream) sqrtMat }; - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), src.type()); - - funcs[depth](src, dst, StreamAccessor::getStream(stream)); -} - -//////////////////////////////////////////////////////////////////////// -// log - -namespace arithm -{ - template - void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - logMat, - logMat, - logMat, - logMat, - logMat, - logMat, - logMat - }; + GpuMat src = _src.getGpuMat(); const int depth = src.depth(); @@ -1634,7 +1601,8 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); funcs[depth](src, dst, StreamAccessor::getStream(stream)); } @@ -1648,7 +1616,7 @@ namespace arithm void expMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) +void cv::gpu::exp(InputArray _src, OutputArray _dst, Stream& stream) { using namespace arithm; @@ -1664,6 +1632,8 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) expMat }; + GpuMat src = _src.getGpuMat(); + const int depth = src.depth(); CV_Assert( depth <= CV_64F ); @@ -1675,11 +1645,100 @@ void cv::gpu::exp(const GpuMat& src, GpuMat& dst, Stream& stream) CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); funcs[depth](src, dst, StreamAccessor::getStream(stream)); } +//////////////////////////////////////////////////////////////////////// +// log + +namespace arithm +{ + template + void logMat(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); +} + +void cv::gpu::log(InputArray _src, OutputArray _dst, Stream& stream) +{ + using namespace arithm; + + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream); + static const func_t funcs[] = + { + logMat, + logMat, + logMat, + logMat, + logMat, + logMat, + logMat + }; + + GpuMat src = _src.getGpuMat(); + + const int depth = src.depth(); + + CV_Assert( depth <= CV_64F ); + CV_Assert( src.channels() == 1 ); + + if (depth == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + } + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[depth](src, dst, StreamAccessor::getStream(stream)); +} + +//////////////////////////////////////////////////////////////////////// +// pow + +namespace arithm +{ + template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); +} + +void cv::gpu::pow(InputArray _src, double power, OutputArray _dst, Stream& stream) +{ + typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); + static const func_t funcs[] = + { + arithm::pow, + arithm::pow, + arithm::pow, + arithm::pow, + arithm::pow, + arithm::pow, + arithm::pow + }; + + GpuMat src = _src.getGpuMat(); + + const int depth = src.depth(); + const int cn = src.channels(); + + CV_Assert(depth <= CV_64F); + + if (depth == CV_64F) + { + if (!deviceSupports(NATIVE_DOUBLE)) + CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); + } + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step); + PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step); + + funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream)); +} + ////////////////////////////////////////////////////////////////////////////// // compare @@ -2562,47 +2621,6 @@ void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); } -//////////////////////////////////////////////////////////////////////// -// pow - -namespace arithm -{ - template void pow(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); -} - -void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) -{ - typedef void (*func_t)(PtrStepSzb src, double power, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow, - arithm::pow - }; - - const int depth = src.depth(); - const int cn = src.channels(); - - CV_Assert(depth <= CV_64F); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), src.type()); - - PtrStepSzb src_(src.rows, src.cols * cn, src.data, src.step); - PtrStepSzb dst_(src.rows, src.cols * cn, dst.data, dst.step); - - funcs[depth](src_, power, dst_, StreamAccessor::getStream(stream)); -} - //////////////////////////////////////////////////////////////////////// // addWeighted diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index 0515a23ad..d0098934c 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -1770,6 +1770,65 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Exp, testing::Combine( MatDepth(CV_32F)), WHOLE_SUBMAT)); +//////////////////////////////////////////////////////////////////////////////// +// Pow + +PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int depth; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + useRoi = GET_PARAM(3); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Pow, Accuracy) +{ + cv::Mat src = randomMat(size, depth, 0.0, 10.0); + double power = randomDouble(2.0, 4.0); + + if (src.depth() < CV_32F) + power = static_cast(power); + + if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) + { + try + { + cv::gpu::GpuMat dst; + cv::gpu::pow(loadMat(src), power, dst); + } + catch (const cv::Exception& e) + { + ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); + } + } + else + { + cv::gpu::GpuMat dst = createMat(size, depth, useRoi); + cv::gpu::pow(loadMat(src, useRoi), power, dst); + + cv::Mat dst_gold; + cv::pow(src, power, dst_gold); + + EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1); + } +} + +INSTANTIATE_TEST_CASE_P(GPU_Arithm, Pow, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + ALL_DEPTH, + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // Compare_Array @@ -2402,65 +2461,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, Max, testing::Combine( ALL_DEPTH, WHOLE_SUBMAT)); -//////////////////////////////////////////////////////////////////////////////// -// Pow - -PARAM_TEST_CASE(Pow, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - cv::Size size; - int depth; - bool useRoi; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - size = GET_PARAM(1); - depth = GET_PARAM(2); - useRoi = GET_PARAM(3); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(Pow, Accuracy) -{ - cv::Mat src = randomMat(size, depth, 0.0, 10.0); - double power = randomDouble(2.0, 4.0); - - if (src.depth() < CV_32F) - power = static_cast(power); - - if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE)) - { - try - { - cv::gpu::GpuMat dst; - cv::gpu::pow(loadMat(src), power, dst); - } - catch (const cv::Exception& e) - { - ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code); - } - } - else - { - cv::gpu::GpuMat dst = createMat(size, depth, useRoi); - cv::gpu::pow(loadMat(src, useRoi), power, dst); - - cv::Mat dst_gold; - cv::pow(src, power, dst_gold); - - EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1); - } -} - -INSTANTIATE_TEST_CASE_P(GPU_Arithm, Pow, testing::Combine( - ALL_DEVICES, - DIFFERENT_SIZES, - ALL_DEPTH, - WHOLE_SUBMAT)); - ////////////////////////////////////////////////////////////////////////////// // AddWeighted From 04a1a6dd8f1576605c3557d4eeeb935b8d1b0fbd Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 14:20:57 +0400 Subject: [PATCH 032/121] switched to Input/Output Array in gpu::compare --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 7 +- modules/gpuarithm/src/element_operations.cpp | 79 ++++++++----------- 2 files changed, 36 insertions(+), 50 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 67dab25ac..5a1859e42 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -92,6 +92,9 @@ CV_EXPORTS void log(InputArray src, OutputArray dst, Stream& stream = Stream::Nu //! (dst(i,j) = pow(fabs(src(i,j)), power), otherwise CV_EXPORTS void pow(InputArray src, double power, OutputArray dst, Stream& stream = Stream::Null()); +//! compares elements of two arrays (dst = src1 src2) +CV_EXPORTS void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -102,10 +105,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! compares elements of two arrays (c = a b) -CV_EXPORTS void compare(const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); -CV_EXPORTS void compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()); - //! performs per-elements bit-wise inversion CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index b93f30052..25e8b3001 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -69,8 +69,7 @@ void cv::gpu::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::compare(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); } -void cv::gpu::compare(const GpuMat&, Scalar, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } @@ -116,11 +115,11 @@ void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, namespace { - typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream); - typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream); + typedef void (*mat_mat_func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op); + typedef void (*mat_scalar_func_t)(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int op); void arithm_op(InputArray _src1, InputArray _src2, OutputArray _dst, InputArray _mask, double scale, int dtype, Stream& stream, - mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func) + mat_mat_func_t mat_mat_func, mat_scalar_func_t mat_scalar_func, int op = 0) { const int kind1 = _src1.kind(); const int kind2 = _src2.kind(); @@ -175,11 +174,11 @@ namespace GpuMat dst = _dst.getGpuMat(); if (isScalar1) - mat_scalar_func(src2, val, true, dst, mask, scale, stream); + mat_scalar_func(src2, val, true, dst, mask, scale, stream, op); else if (isScalar2) - mat_scalar_func(src1, val, false, dst, mask, scale, stream); + mat_scalar_func(src1, val, false, dst, mask, scale, stream, op); else - mat_mat_func(src1, src2, dst, mask, scale, stream); + mat_mat_func(src1, src2, dst, mask, scale, stream, op); } } @@ -369,7 +368,7 @@ namespace arithm void addMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) +static void addMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -498,7 +497,7 @@ namespace arithm void addScalar(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) +static void addScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -620,7 +619,7 @@ namespace arithm void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) +static void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -749,7 +748,7 @@ namespace arithm void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& _stream) +static void subScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); static const func_t funcs[7][7] = @@ -872,7 +871,7 @@ namespace arithm void mulMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } -static void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) +static void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); static const func_t funcs[7][7] = @@ -966,7 +965,7 @@ namespace arithm void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } -static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) +static void mulScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double scale, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = @@ -1121,7 +1120,7 @@ namespace arithm void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); } -static void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) +static void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream); static const func_t funcs[7][7] = @@ -1215,7 +1214,7 @@ namespace arithm void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); } -static void divScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double scale, Stream& _stream) +static void divScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double scale, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7] = @@ -1379,7 +1378,7 @@ namespace arithm void absDiffMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); } -static void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream) +static void absDiffMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int) { typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[] = @@ -1451,7 +1450,7 @@ namespace arithm void absDiffScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); } -static void absDiffScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream) +static void absDiffScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int) { typedef void (*func_t)(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[] = @@ -1755,7 +1754,7 @@ namespace arithm template void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& s) +static void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int cmpop) { using namespace arithm; @@ -1780,19 +1779,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c const int depth = src1.depth(); const int cn = src1.channels(); - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); - CV_Assert( cmpop >= CMP_EQ && cmpop <= CMP_NE ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), CV_MAKE_TYPE(CV_8U, cn)); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); static const int codes[] = { @@ -1859,7 +1846,7 @@ namespace } } -void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stream& stream) +static void cmpScalar(const GpuMat& src, Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop) { using namespace arithm; @@ -1881,24 +1868,24 @@ void cv::gpu::compare(const GpuMat& src, Scalar sc, GpuMat& dst, int cmpop, Stre castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar }; + if (inv) + { + // src1 is a scalar; swap it with src2 + cmpop = cmpop == CMP_LT ? CMP_GT : cmpop == CMP_LE ? CMP_GE : + cmpop == CMP_GE ? CMP_LE : cmpop == CMP_GT ? CMP_LT : cmpop; + } + const int depth = src.depth(); const int cn = src.channels(); - CV_Assert( depth <= CV_64F ); - CV_Assert( cn <= 4 ); - CV_Assert( cmpop >= CMP_EQ && cmpop <= CMP_NE ); + cast_func[depth](val); - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } + funcs[depth][cmpop](src, cn, val.val, dst, StreamAccessor::getStream(stream)); +} - dst.create(src.size(), CV_MAKE_TYPE(CV_8U, cn)); - - cast_func[depth](sc); - - funcs[depth][cmpop](src, cn, sc.val, dst, StreamAccessor::getStream(stream)); +void cv::gpu::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream) +{ + arithm_op(src1, src2, dst, noArray(), 1.0, CV_8U, stream, cmpMat, cmpScalar, cmpop); } ////////////////////////////////////////////////////////////////////////////// From d81f54db0b5d2268047bc26945bb53a45b2c1646 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 15:02:22 +0400 Subject: [PATCH 033/121] switched to Input/Output Array in bitwise logical operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 33 +-- modules/gpuarithm/src/element_operations.cpp | 261 ++++++------------ 2 files changed, 94 insertions(+), 200 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 5a1859e42..807285e98 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -95,6 +95,18 @@ CV_EXPORTS void pow(InputArray src, double power, OutputArray dst, Stream& strea //! compares elements of two arrays (dst = src1 src2) CV_EXPORTS void compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()); +//! performs per-elements bit-wise inversion +CV_EXPORTS void bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +//! calculates per-element bit-wise disjunction of two arrays +CV_EXPORTS void bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +//! calculates per-element bit-wise conjunction of two arrays +CV_EXPORTS void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + +//! calculates per-element bit-wise "exclusive or" operation +CV_EXPORTS void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -105,27 +117,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! performs per-elements bit-wise inversion -CV_EXPORTS void bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise disjunction of two arrays -CV_EXPORTS void bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise disjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise conjunction of two arrays -CV_EXPORTS void bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise conjunction of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! calculates per-element bit-wise "exclusive or" operation -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()); -//! calculates per-element bit-wise "exclusive or" of array and scalar -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()); - //! pixel by pixel right shift of an image by a constant value //! supports 1, 3 and 4 channels images with integers elements CV_EXPORTS void rshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 25e8b3001..61c433852 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -71,16 +71,13 @@ void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_not(const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_or(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_or(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_and(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_and(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_xor(const GpuMat&, const GpuMat&, GpuMat&, const GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::bitwise_xor(const GpuMat&, const Scalar&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } void cv::gpu::rshift(const GpuMat&, Scalar_, GpuMat&, Stream&) { throw_no_cuda(); } @@ -1889,25 +1886,29 @@ void cv::gpu::compare(InputArray src1, InputArray src2, OutputArray dst, int cmp } ////////////////////////////////////////////////////////////////////////////// -// Unary bitwise logical operations +// bitwise_not namespace arithm { template void bitMatNot(PtrStepSzb src, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& s) +void cv::gpu::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& _stream) { using namespace arithm; + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); + const int depth = src.depth(); CV_Assert( depth <= CV_64F ); CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); const int bcols = (int) (src.cols * src.elemSize()); @@ -1941,6 +1942,16 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St ////////////////////////////////////////////////////////////////////////////// // Binary bitwise logical operations +namespace +{ + enum + { + BIT_OP_AND, + BIT_OP_OR, + BIT_OP_XOR + }; +} + namespace arithm { template void bitMatAnd(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); @@ -1948,19 +1959,31 @@ namespace arithm template void bitMatXor(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); } -void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s) +static void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int op) { using namespace arithm; - const int depth = src1.depth(); + typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream); + static const func_t funcs32[] = + { + bitMatAnd, + bitMatOr, + bitMatXor + }; + static const func_t funcs16[] = + { + bitMatAnd, + bitMatOr, + bitMatXor + }; + static const func_t funcs8[] = + { + bitMatAnd, + bitMatOr, + bitMatXor + }; - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); - CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); const int bcols = (int) (src1.cols * src1.elemSize()); @@ -1968,8 +1991,7 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c { const int vcols = bcols >> 2; - bitMatAnd( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + funcs32[op](PtrStepSzb(src1.rows, vcols, src1.data, src1.step), PtrStepSzb(src1.rows, vcols, src2.data, src2.step), PtrStepSzb(src1.rows, vcols, dst.data, dst.step), mask, stream); @@ -1978,8 +2000,7 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c { const int vcols = bcols >> 1; - bitMatAnd( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), + funcs16[op](PtrStepSzb(src1.rows, vcols, src1.data, src1.step), PtrStepSzb(src1.rows, vcols, src2.data, src2.step), PtrStepSzb(src1.rows, vcols, dst.data, dst.step), mask, stream); @@ -1987,111 +2008,13 @@ void cv::gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, c else { - bitMatAnd( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); + funcs8[op](PtrStepSzb(src1.rows, bcols, src1.data, src1.step), + PtrStepSzb(src1.rows, bcols, src2.data, src2.step), + PtrStepSzb(src1.rows, bcols, dst.data, dst.step), + mask, stream); } } -void cv::gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s) -{ - using namespace arithm; - - const int depth = src1.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); - CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const int bcols = (int) (src1.cols * src1.elemSize()); - - if ((bcols & 3) == 0) - { - const int vcols = bcols >> 2; - - bitMatOr( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) - { - const int vcols = bcols >> 1; - - bitMatOr( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else - { - - bitMatOr( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); - } -} - -void cv::gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& s) -{ - using namespace arithm; - - const int depth = src1.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.size() == src1.size() && src2.type() == src1.type() ); - CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src1.size()) ); - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); - - const int bcols = (int) (src1.cols * src1.elemSize()); - - if ((bcols & 3) == 0) - { - const int vcols = bcols >> 2; - - bitMatXor( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else if ((bcols & 1) == 0) - { - const int vcols = bcols >> 1; - - bitMatXor( - PtrStepSzb(src1.rows, vcols, src1.data, src1.step), - PtrStepSzb(src1.rows, vcols, src2.data, src2.step), - PtrStepSzb(src1.rows, vcols, dst.data, dst.step), - mask, stream); - } - else - { - - bitMatXor( - PtrStepSzb(src1.rows, bcols, src1.data, src1.step), - PtrStepSzb(src1.rows, bcols, src2.data, src2.step), - PtrStepSzb(src1.rows, bcols, dst.data, dst.step), - mask, stream); - } -} - -////////////////////////////////////////////////////////////////////////////// -// Binary bitwise logical operations with scalars - namespace arithm { template void bitScalarAnd(PtrStepSzb src1, unsigned int src2, PtrStepSzb dst, cudaStream_t stream); @@ -2179,18 +2102,34 @@ namespace }; } -void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) +static void bitScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op) { using namespace arithm; typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); - static const func_t funcs[5][4] = + static const func_t funcs[3][5][4] = { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, - {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, - {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + { + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarAnd >::call}, + {0,0,0,0}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {0,0,0,0}, + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + }, + { + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, + {0,0,0,0}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {0,0,0,0}, + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + }, + { + {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, + {0,0,0,0}, + {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, + {0,0,0,0}, + {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} + } }; const int depth = src.depth(); @@ -2198,60 +2137,24 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); CV_Assert( cn == 1 || cn == 3 || cn == 4 ); + CV_Assert( mask.empty() ); - dst.create(src.size(), src.type()); - - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + funcs[op][depth][cn - 1](src, val, dst, StreamAccessor::getStream(stream)); } -void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) +void cv::gpu::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) { - using namespace arithm; - - typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); - static const func_t funcs[5][4] = - { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarOr >::call}, - {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, - {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} - }; - - const int depth = src.depth(); - const int cn = src.channels(); - - CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); - CV_Assert( cn == 1 || cn == 3 || cn == 4 ); - - dst.create(src.size(), src.type()); - - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); + arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_OR); } -void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) +void cv::gpu::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) { - using namespace arithm; + arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_AND); +} - typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); - static const func_t funcs[5][4] = - { - {BitScalar >::call , 0, NppBitwiseC::call, BitScalar4< bitScalarXor >::call}, - {0,0,0,0}, - {BitScalar >::call, 0, NppBitwiseC::call, NppBitwiseC::call}, - {0,0,0,0}, - {BitScalar >::call , 0, NppBitwiseC::call, NppBitwiseC::call} - }; - - const int depth = src.depth(); - const int cn = src.channels(); - - CV_Assert( depth == CV_8U || depth == CV_16U || depth == CV_32S ); - CV_Assert( cn == 1 || cn == 3 || cn == 4 ); - - dst.create(src.size(), src.type()); - - funcs[depth][cn - 1](src, sc, dst, StreamAccessor::getStream(stream)); +void cv::gpu::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask, Stream& stream) +{ + arithm_op(src1, src2, dst, mask, 1.0, -1, stream, bitMat, bitScalar, BIT_OP_XOR); } ////////////////////////////////////////////////////////////////////////////// From f2aa6ebe158ed59407e166586cec239286abe881 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 15:10:13 +0400 Subject: [PATCH 034/121] switched to Input/Output Array in shift operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 16 +++++----- modules/gpuarithm/src/element_operations.cpp | 30 +++++++++++-------- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 807285e98..943b3a1d8 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -107,6 +107,14 @@ CV_EXPORTS void bitwise_and(InputArray src1, InputArray src2, OutputArray dst, I //! calculates per-element bit-wise "exclusive or" operation CV_EXPORTS void bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()); +//! pixel by pixel right shift of an image by a constant value +//! supports 1, 3 and 4 channels images with integers elements +CV_EXPORTS void rshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); + +//! pixel by pixel left shift of an image by a constant value +//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth +CV_EXPORTS void lshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -117,14 +125,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! pixel by pixel right shift of an image by a constant value -//! supports 1, 3 and 4 channels images with integers elements -CV_EXPORTS void rshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); - -//! pixel by pixel left shift of an image by a constant value -//! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth -CV_EXPORTS void lshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream = Stream::Null()); - //! computes per-element minimum of two arrays (dst = min(src1, src2)) CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 61c433852..5a0f206ae 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -79,9 +79,9 @@ void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Strea void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::rshift(const GpuMat&, Scalar_, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::rshift(InputArray, Scalar_, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::lshift(const GpuMat&, Scalar_, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::lshift(InputArray, Scalar_, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); } @@ -2213,7 +2213,7 @@ namespace }; } -void cv::gpu::rshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream) +void cv::gpu::rshift(InputArray _src, Scalar_ val, OutputArray _dst, Stream& stream) { typedef void (*func_t)(const GpuMat& src, Scalar_ sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = @@ -2225,15 +2225,18 @@ void cv::gpu::rshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& st {NppShift::call, 0, NppShift::call, NppShift::call}, }; - CV_Assert(src.depth() < CV_32F); - CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); + GpuMat src = _src.getGpuMat(); - dst.create(src.size(), src.type()); + CV_Assert( src.depth() < CV_32F ); + CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); - funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream)); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); } -void cv::gpu::lshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream) +void cv::gpu::lshift(InputArray _src, Scalar_ val, OutputArray _dst, Stream& stream) { typedef void (*func_t)(const GpuMat& src, Scalar_ sc, GpuMat& dst, cudaStream_t stream); static const func_t funcs[5][4] = @@ -2245,12 +2248,15 @@ void cv::gpu::lshift(const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& st {NppShift::call, 0, NppShift::call, NppShift::call}, }; - CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S); - CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); + GpuMat src = _src.getGpuMat(); - dst.create(src.size(), src.type()); + CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S ); + CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); - funcs[src.depth()][src.channels() - 1](src, sc, dst, StreamAccessor::getStream(stream)); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); } ////////////////////////////////////////////////////////////////////////////// From ec70282bf75341db1d60c9f4a19328d716709044 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 15:23:44 +0400 Subject: [PATCH 035/121] switched to Input/Output Array in min/max operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 18 +- modules/gpuarithm/src/element_operations.cpp | 255 ++++++------------ modules/gpuimgproc/src/hough.cpp | 2 +- modules/nonfree/src/surf_gpu.cpp | 2 +- 4 files changed, 86 insertions(+), 191 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 943b3a1d8..42d69ef94 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -115,6 +115,12 @@ CV_EXPORTS void rshift(InputArray src, Scalar_ val, OutputArray dst, Stream //! supports 1, 3 and 4 channels images with CV_8U, CV_16U or CV_32S depth CV_EXPORTS void lshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()); +//! computes per-element minimum of two arrays (dst = min(src1, src2)) +CV_EXPORTS void min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + +//! computes per-element maximum of two arrays (dst = max(src1, src2)) +CV_EXPORTS void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); + //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()); @@ -125,18 +131,6 @@ static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2 addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } -//! computes per-element minimum of two arrays (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element minimum of array and scalar (dst = min(src1, src2)) -CV_EXPORTS void min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of two arrays (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()); - -//! computes per-element maximum of array and scalar (dst = max(src1, src2)) -CV_EXPORTS void max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()); - //! implements generalized matrix product algorithm GEMM from BLAS CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 5a0f206ae..425b699a0 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -48,46 +48,30 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) void cv::gpu::add(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } - void cv::gpu::subtract(InputArray, InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } - void cv::gpu::multiply(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } - void cv::gpu::divide(InputArray, InputArray, OutputArray, double, int, Stream&) { throw_no_cuda(); } - void cv::gpu::absdiff(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::abs(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::sqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::sqrt(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::exp(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::log(InputArray, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::pow(InputArray, double, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::compare(InputArray, InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::bitwise_not(InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::bitwise_or(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::bitwise_and(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::bitwise_xor(InputArray, InputArray, OutputArray, InputArray, Stream&) { throw_no_cuda(); } void cv::gpu::rshift(InputArray, Scalar_, OutputArray, Stream&) { throw_no_cuda(); } - void cv::gpu::lshift(InputArray, Scalar_, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::min(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::min(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::max(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::max(const GpuMat&, double, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_no_cuda(); } @@ -2262,6 +2246,15 @@ void cv::gpu::lshift(InputArray _src, Scalar_ val, OutputArray _dst, Stream ////////////////////////////////////////////////////////////////////////////// // Minimum and maximum operations +namespace +{ + enum + { + MIN_OP, + MAX_OP + }; +} + namespace arithm { void minMat_v4(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); @@ -2275,37 +2268,49 @@ namespace arithm template void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s) +void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int op) { using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = + static const func_t funcs[2][7] = { - minMat, - minMat, - minMat, - minMat, - minMat, - minMat, - minMat + { + minMat, + minMat, + minMat, + minMat, + minMat, + minMat, + minMat + }, + { + maxMat, + maxMat, + maxMat, + maxMat, + maxMat, + maxMat, + maxMat + } + }; + + typedef void (*opt_func_t)(PtrStepSz src1, PtrStepSz src2, PtrStepSz dst, cudaStream_t stream); + static const opt_func_t funcs_v4[2] = + { + minMat_v4, maxMat_v4 + }; + static const opt_func_t funcs_v2[2] = + { + minMat_v2, maxMat_v2 }; const int depth = src1.depth(); const int cn = src1.channels(); CV_Assert( depth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); @@ -2325,10 +2330,10 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s { const int vcols = src1_.cols >> 2; - minMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + funcs_v4[op](PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } @@ -2336,96 +2341,17 @@ void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s { const int vcols = src1_.cols >> 1; - minMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); + funcs_v2[op](PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), + PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), + stream); return; } } } - const func_t func = funcs[depth]; - - if (!func) - CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); - - func(src1_, src2_, dst_, stream); -} - -void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s) -{ - using namespace arithm; - - typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - maxMat, - maxMat, - maxMat, - maxMat, - maxMat, - maxMat, - maxMat - }; - - const int depth = src1.depth(); - const int cn = src1.channels(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src2.type() == src1.type() && src2.size() == src1.size() ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src1.size(), src1.type()); - - cudaStream_t stream = StreamAccessor::getStream(s); - - PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); - PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); - PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step); - - if (depth == CV_8U || depth == CV_16U) - { - const intptr_t src1ptr = reinterpret_cast(src1_.data); - const intptr_t src2ptr = reinterpret_cast(src2_.data); - const intptr_t dstptr = reinterpret_cast(dst_.data); - - const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0; - - if (isAllAligned) - { - if (depth == CV_8U && (src1_.cols & 3) == 0) - { - const int vcols = src1_.cols >> 2; - - maxMat_v4(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); - - return; - } - else if (depth == CV_16U && (src1_.cols & 1) == 0) - { - const int vcols = src1_.cols >> 1; - - maxMat_v2(PtrStepSz(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step), - PtrStepSz(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step), - stream); - - return; - } - } - } - - const func_t func = funcs[depth]; + const func_t func = funcs[op][depth]; if (!func) CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); @@ -2441,20 +2367,31 @@ namespace } } -void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) +void minMaxScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op) { using namespace arithm; typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = + static const func_t funcs[2][7] = { - minScalar, - minScalar, - minScalar, - minScalar, - minScalar, - minScalar, - minScalar + { + minScalar, + minScalar, + minScalar, + minScalar, + minScalar, + minScalar, + minScalar + }, + { + maxScalar, + maxScalar, + maxScalar, + maxScalar, + maxScalar, + maxScalar, + maxScalar + } }; typedef double (*cast_func_t)(double sc); @@ -2468,53 +2405,17 @@ void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream) CV_Assert( depth <= CV_64F ); CV_Assert( src.channels() == 1 ); - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), src.type()); - - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); + funcs[op][depth](src, cast_func[depth](val[0]), dst, StreamAccessor::getStream(stream)); } -void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream) +void cv::gpu::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) { - using namespace arithm; + arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MIN_OP); +} - typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream); - static const func_t funcs[] = - { - maxScalar, - maxScalar, - maxScalar, - maxScalar, - maxScalar, - maxScalar, - maxScalar - }; - - typedef double (*cast_func_t)(double sc); - static const cast_func_t cast_func[] = - { - castScalar, castScalar, castScalar, castScalar, castScalar, castScalar, castScalar - }; - - const int depth = src.depth(); - - CV_Assert( depth <= CV_64F ); - CV_Assert( src.channels() == 1 ); - - if (depth == CV_64F) - { - if (!deviceSupports(NATIVE_DOUBLE)) - CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); - } - - dst.create(src.size(), src.type()); - - funcs[depth](src, cast_func[depth](val), dst, StreamAccessor::getStream(stream)); +void cv::gpu::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream) +{ + arithm_op(src1, src2, dst, noArray(), 1.0, -1, stream, minMaxMat, minMaxScalar, MAX_OP); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuimgproc/src/hough.cpp b/modules/gpuimgproc/src/hough.cpp index bc0a8a400..15e529762 100644 --- a/modules/gpuimgproc/src/hough.cpp +++ b/modules/gpuimgproc/src/hough.cpp @@ -761,7 +761,7 @@ namespace { buildRTable_gpu(edgePointList.ptr(0), edgePointList.ptr(1), edgePointList.cols, r_table, r_sizes.ptr(), make_short2(templCenter.x, templCenter.y), levels); - min(r_sizes, maxSize, r_sizes); + gpu::min(r_sizes, maxSize, r_sizes); } } diff --git a/modules/nonfree/src/surf_gpu.cpp b/modules/nonfree/src/surf_gpu.cpp index ace9bb53a..82ade2927 100644 --- a/modules/nonfree/src/surf_gpu.cpp +++ b/modules/nonfree/src/surf_gpu.cpp @@ -147,7 +147,7 @@ namespace if (use_mask) { - min(mask, 1.0, surf_.mask1); + gpu::min(mask, 1.0, surf_.mask1); gpu::integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer); maskOffset = bindMaskSumTex(surf_.maskSum); } From 44ec450b5370f55c2a19199dbdf2c86919a21127 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 15:28:29 +0400 Subject: [PATCH 036/121] switched to Input/Output Array in gpu::addWeighted --- modules/gpuarithm/include/opencv2/gpuarithm.hpp | 4 ++-- modules/gpuarithm/src/element_operations.cpp | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 42d69ef94..32afca6a2 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -122,11 +122,11 @@ CV_EXPORTS void min(InputArray src1, InputArray src2, OutputArray dst, Stream& s CV_EXPORTS void max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()); //! computes the weighted sum of two arrays (dst = alpha*src1 + beta*src2 + gamma) -CV_EXPORTS void addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, +CV_EXPORTS void addWeighted(InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null()); //! adds scaled array to another one (dst = alpha*src1 + src2) -static inline void scaleAdd(const GpuMat& src1, double alpha, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()) +static inline void scaleAdd(InputArray src1, double alpha, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()) { addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 425b699a0..a54bd9d71 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -73,7 +73,7 @@ void cv::gpu::lshift(InputArray, Scalar_, OutputArray, Stream&) { throw_no_ void cv::gpu::min(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::addWeighted(const GpuMat&, double, const GpuMat&, double, double, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::addWeighted(InputArray, double, InputArray, double, double, OutputArray, int, Stream&) { throw_no_cuda(); } double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_no_cuda(); return 0.0;} @@ -2427,7 +2427,7 @@ namespace arithm void addWeighted(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); } -void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int ddepth, Stream& stream) +void cv::gpu::addWeighted(InputArray _src1, double alpha, InputArray _src2, double beta, double gamma, OutputArray _dst, int ddepth, Stream& stream) { typedef void (*func_t)(PtrStepSzb src1, double alpha, PtrStepSzb src2, double beta, double gamma, PtrStepSzb dst, cudaStream_t stream); static const func_t funcs[7][7][7] = @@ -2889,6 +2889,9 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, } }; + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + int sdepth1 = src1.depth(); int sdepth2 = src2.depth(); ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2); @@ -2903,7 +2906,8 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); + _dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); + GpuMat dst = _dst.getGpuMat(); PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step); PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step); From 0a83817acec8a31dab148a68f669c3fcc9477ac4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 15:33:19 +0400 Subject: [PATCH 037/121] switched to Input/Output Array in gpu::threshold --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 6 +- modules/gpuarithm/src/element_operations.cpp | 11 ++- .../test/test_element_operations.cpp | 96 +++++++++---------- 3 files changed, 58 insertions(+), 55 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 32afca6a2..d78e2ceec 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -131,6 +131,9 @@ static inline void scaleAdd(InputArray src1, double alpha, InputArray src2, Outp addWeighted(src1, alpha, src2, 1.0, 0.0, dst, -1, stream); } +//! applies fixed threshold to the image +CV_EXPORTS double threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); + //! implements generalized matrix product algorithm GEMM from BLAS CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); @@ -256,9 +259,6 @@ CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, co CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null()); -//! applies fixed threshold to the image -CV_EXPORTS double threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); - //! computes the integral image //! sum will have CV_32S type, but will contain unsigned int values //! supports only CV_8UC1 source type diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index a54bd9d71..766ed9687 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -75,7 +75,7 @@ void cv::gpu::max(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda( void cv::gpu::addWeighted(InputArray, double, InputArray, double, double, OutputArray, int, Stream&) { throw_no_cuda(); } -double cv::gpu::threshold(const GpuMat&, GpuMat&, double, double, int, Stream&) {throw_no_cuda(); return 0.0;} +double cv::gpu::threshold(InputArray, OutputArray, double, double, int, Stream&) {throw_no_cuda(); return 0.0;} void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } @@ -2938,8 +2938,10 @@ namespace arithm void threshold(PtrStepSzb src, PtrStepSzb dst, double thresh, double maxVal, int type, cudaStream_t stream); } -double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, Stream& s) +double cv::gpu::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& _stream) { + GpuMat src = _src.getGpuMat(); + const int depth = src.depth(); CV_Assert( src.channels() == 1 && depth <= CV_64F ); @@ -2951,9 +2953,10 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); if (src.type() == CV_32FC1 && type == 2/*THRESH_TRUNC*/) { diff --git a/modules/gpuarithm/test/test_element_operations.cpp b/modules/gpuarithm/test/test_element_operations.cpp index d0098934c..61ea454ea 100644 --- a/modules/gpuarithm/test/test_element_operations.cpp +++ b/modules/gpuarithm/test/test_element_operations.cpp @@ -2526,6 +2526,54 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, AddWeighted, testing::Combine( ALL_DEPTH, WHOLE_SUBMAT)); +/////////////////////////////////////////////////////////////////////////////////////////////////////// +// Threshold + +CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV) +#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV)) + +PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + int threshOp; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + threshOp = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Threshold, Accuracy) +{ + cv::Mat src = randomMat(size, type); + double maxVal = randomDouble(20.0, 127.0); + double thresh = randomDouble(0.0, maxVal); + + cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi); + cv::gpu::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp); + + cv::Mat dst_gold; + cv::threshold(src, dst_gold, thresh, maxVal, threshOp); + + EXPECT_MAT_NEAR(dst_gold, dst, 0.0); +} + +INSTANTIATE_TEST_CASE_P(GPU_Arithm, Threshold, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)), + ALL_THRESH_OPS, + WHOLE_SUBMAT)); + //////////////////////////////////////////////////////////////////////////////// // Magnitude @@ -2744,52 +2792,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Arithm, PolarToCart, testing::Combine( testing::Values(AngleInDegrees(false), AngleInDegrees(true)), WHOLE_SUBMAT)); -/////////////////////////////////////////////////////////////////////////////////////////////////////// -// Threshold - -CV_ENUM(ThreshOp, cv::THRESH_BINARY, cv::THRESH_BINARY_INV, cv::THRESH_TRUNC, cv::THRESH_TOZERO, cv::THRESH_TOZERO_INV) -#define ALL_THRESH_OPS testing::Values(ThreshOp(cv::THRESH_BINARY), ThreshOp(cv::THRESH_BINARY_INV), ThreshOp(cv::THRESH_TRUNC), ThreshOp(cv::THRESH_TOZERO), ThreshOp(cv::THRESH_TOZERO_INV)) - -PARAM_TEST_CASE(Threshold, cv::gpu::DeviceInfo, cv::Size, MatType, ThreshOp, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - cv::Size size; - int type; - int threshOp; - bool useRoi; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - size = GET_PARAM(1); - type = GET_PARAM(2); - threshOp = GET_PARAM(3); - useRoi = GET_PARAM(4); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(Threshold, Accuracy) -{ - cv::Mat src = randomMat(size, type); - double maxVal = randomDouble(20.0, 127.0); - double thresh = randomDouble(0.0, maxVal); - - cv::gpu::GpuMat dst = createMat(src.size(), src.type(), useRoi); - cv::gpu::threshold(loadMat(src, useRoi), dst, thresh, maxVal, threshOp); - - cv::Mat dst_gold; - cv::threshold(src, dst_gold, thresh, maxVal, threshOp); - - EXPECT_MAT_NEAR(dst_gold, dst, 0.0); -} - -INSTANTIATE_TEST_CASE_P(GPU_Arithm, Threshold, testing::Combine( - ALL_DEVICES, - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_16SC1), MatType(CV_32FC1)), - ALL_THRESH_OPS, - WHOLE_SUBMAT)); - #endif // HAVE_CUDA From 58c4d0eaeb3c41f420bbb4f43258e99676f3947d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 15:46:45 +0400 Subject: [PATCH 038/121] switched to Input/Output Array in cart<->polar operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 56 +++++------ modules/gpuarithm/src/element_operations.cpp | 98 ++++++++++++------- 2 files changed, 93 insertions(+), 61 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index d78e2ceec..d429c34eb 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -134,6 +134,34 @@ static inline void scaleAdd(InputArray src1, double alpha, InputArray src2, Outp //! applies fixed threshold to the image CV_EXPORTS double threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()); +//! computes magnitude of complex (x(i).re, x(i).im) vector +//! supports only CV_32FC2 type +CV_EXPORTS void magnitude(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); + +//! computes squared magnitude of complex (x(i).re, x(i).im) vector +//! supports only CV_32FC2 type +CV_EXPORTS void magnitudeSqr(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()); + +//! computes magnitude of each (x(i), y(i)) vector +//! supports only floating-point source +CV_EXPORTS void magnitude(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); + +//! computes squared magnitude of each (x(i), y(i)) vector +//! supports only floating-point source +CV_EXPORTS void magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()); + +//! computes angle of each (x(i), y(i)) vector +//! supports only floating-point source +CV_EXPORTS void phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +//! converts Cartesian coordinates to polar +//! supports only floating-point source +CV_EXPORTS void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); + +//! converts polar coordinates to Cartesian +//! supports only floating-point source +CV_EXPORTS void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()); + //! implements generalized matrix product algorithm GEMM from BLAS CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); @@ -163,34 +191,6 @@ CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::N //! copies each plane of a multi-channel array to a dedicated array CV_EXPORTS void split(const GpuMat& src, std::vector& dst, Stream& stream = Stream::Null()); -//! computes magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type -CV_EXPORTS void magnitude(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes squared magnitude of complex (x(i).re, x(i).im) vector -//! supports only CV_32FC2 type -CV_EXPORTS void magnitudeSqr(const GpuMat& xy, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes magnitude of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes squared magnitude of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()); - -//! computes angle (angle(i)) of each (x(i), y(i)) vector -//! supports only floating-point source -CV_EXPORTS void phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! converts Cartesian coordinates to polar -//! supports only floating-point source -CV_EXPORTS void cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees = false, Stream& stream = Stream::Null()); - -//! converts polar coordinates to Cartesian -//! supports only floating-point source -CV_EXPORTS void polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees = false, Stream& stream = Stream::Null()); - //! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()); diff --git a/modules/gpuarithm/src/element_operations.cpp b/modules/gpuarithm/src/element_operations.cpp index 766ed9687..3ec4f84f6 100644 --- a/modules/gpuarithm/src/element_operations.cpp +++ b/modules/gpuarithm/src/element_operations.cpp @@ -77,17 +77,13 @@ void cv::gpu::addWeighted(InputArray, double, InputArray, double, double, Output double cv::gpu::threshold(InputArray, OutputArray, double, double, int, Stream&) {throw_no_cuda(); return 0.0;} -void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); } - -void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); } - -void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); } +void cv::gpu::magnitude(InputArray, OutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::magnitude(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::magnitudeSqr(InputArray, OutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::magnitudeSqr(InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::phase(InputArray, InputArray, OutputArray, bool, Stream&) { throw_no_cuda(); } +void cv::gpu::cartToPolar(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); } +void cv::gpu::polarToCart(InputArray, InputArray, OutputArray, OutputArray, bool, Stream&) { throw_no_cuda(); } #else @@ -3005,12 +3001,10 @@ namespace { typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI); - inline void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream) + void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream) { CV_Assert(src.type() == CV_32FC2); - dst.create(src.size(), CV_32FC1); - NppiSize sz; sz.width = src.cols; sz.height = src.rows; @@ -3024,13 +3018,23 @@ namespace } } -void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream) +void cv::gpu::magnitude(InputArray _src, OutputArray _dst, Stream& stream) { + GpuMat src = _src.getGpuMat(); + + _dst.create(src.size(), CV_32FC1); + GpuMat dst = _dst.getGpuMat(); + npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream)); } -void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream) +void cv::gpu::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream) { + GpuMat src = _src.getGpuMat(); + + _dst.create(src.size(), CV_32FC1); + GpuMat dst = _dst.getGpuMat(); + npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream)); } @@ -3048,18 +3052,13 @@ namespace cv { namespace gpu { namespace cudev namespace { - inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream) + void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream) { using namespace ::cv::gpu::cudev::mathfunc; CV_Assert(x.size() == y.size() && x.type() == y.type()); CV_Assert(x.depth() == CV_32F); - if (mag) - mag->create(x.size(), x.type()); - if (angle) - angle->create(x.size(), x.type()); - GpuMat x1cn = x.reshape(1); GpuMat y1cn = y.reshape(1); GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat(); @@ -3068,16 +3067,13 @@ namespace cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream); } - inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream) + void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream) { using namespace ::cv::gpu::cudev::mathfunc; CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type()); CV_Assert(mag.depth() == CV_32F); - x.create(mag.size(), mag.type()); - y.create(mag.size(), mag.type()); - GpuMat mag1cn = mag.reshape(1); GpuMat angle1cn = angle.reshape(1); GpuMat x1cn = x.reshape(1); @@ -3087,29 +3083,65 @@ namespace } } -void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream) +void cv::gpu::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream) { + GpuMat x = _x.getGpuMat(); + GpuMat y = _y.getGpuMat(); + + _dst.create(x.size(), CV_32FC1); + GpuMat dst = _dst.getGpuMat(); + cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream)); } -void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream) +void cv::gpu::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream) { + GpuMat x = _x.getGpuMat(); + GpuMat y = _y.getGpuMat(); + + _dst.create(x.size(), CV_32FC1); + GpuMat dst = _dst.getGpuMat(); + cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream)); } -void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream) +void cv::gpu::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream) { - cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream)); + GpuMat x = _x.getGpuMat(); + GpuMat y = _y.getGpuMat(); + + _dst.create(x.size(), CV_32FC1); + GpuMat dst = _dst.getGpuMat(); + + cartToPolar_caller(x, y, 0, false, &dst, angleInDegrees, StreamAccessor::getStream(stream)); } -void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream) +void cv::gpu::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream) { + GpuMat x = _x.getGpuMat(); + GpuMat y = _y.getGpuMat(); + + _mag.create(x.size(), CV_32FC1); + GpuMat mag = _mag.getGpuMat(); + + _angle.create(x.size(), CV_32FC1); + GpuMat angle = _angle.getGpuMat(); + cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream)); } -void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream) +void cv::gpu::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& stream) { - polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream)); + GpuMat mag = _mag.getGpuMat(); + GpuMat angle = _angle.getGpuMat(); + + _x.create(mag.size(), CV_32FC1); + GpuMat x = _x.getGpuMat(); + + _y.create(mag.size(), CV_32FC1); + GpuMat y = _y.getGpuMat(); + + polarToCart_caller(mag, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream)); } #endif From 3d8ca010b78fa7dbcdc5647ddeeb2cbc27a60ce4 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 16:02:41 +0400 Subject: [PATCH 039/121] switched to Input/Output Array in split/merge operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 20 ++--- modules/gpuarithm/src/core.cpp | 84 +++++++++---------- modules/gpuarithm/src/cuda/split_merge.cu | 4 +- 3 files changed, 49 insertions(+), 59 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index d429c34eb..8996372b5 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -162,6 +162,14 @@ CV_EXPORTS void cartToPolar(InputArray x, InputArray y, OutputArray magnitude, O //! supports only floating-point source CV_EXPORTS void polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()); +//! makes multi-channel array out of several single-channel arrays +CV_EXPORTS void merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream = Stream::Null()); +CV_EXPORTS void merge(const std::vector& src, OutputArray dst, Stream& stream = Stream::Null()); + +//! copies each plane of a multi-channel array to a dedicated array +CV_EXPORTS void split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null()); +CV_EXPORTS void split(InputArray src, std::vector& dst, Stream& stream = Stream::Null()); + //! implements generalized matrix product algorithm GEMM from BLAS CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); @@ -179,18 +187,6 @@ CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = //! supports CV_8UC1, CV_8UC3 types CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null()); -//! makes multi-channel array out of several single-channel arrays -CV_EXPORTS void merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null()); - -//! makes multi-channel array out of several single-channel arrays -CV_EXPORTS void merge(const std::vector& src, GpuMat& dst, Stream& stream = Stream::Null()); - -//! copies each plane of a multi-channel array to a dedicated array -CV_EXPORTS void split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null()); - -//! copies each plane of a multi-channel array to a dedicated array -CV_EXPORTS void split(const GpuMat& src, std::vector& dst, Stream& stream = Stream::Null()); - //! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()); diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp index bd0277cde..92f085ebc 100644 --- a/modules/gpuarithm/src/core.cpp +++ b/modules/gpuarithm/src/core.cpp @@ -47,11 +47,11 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -void cv::gpu::merge(const GpuMat* /*src*/, size_t /*count*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); } -void cv::gpu::merge(const std::vector& /*src*/, GpuMat& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); } +void cv::gpu::merge(const GpuMat*, size_t, OutputArray, Stream&) { throw_no_cuda(); } +void cv::gpu::merge(const std::vector&, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::split(const GpuMat& /*src*/, GpuMat* /*dst*/, Stream& /*stream*/) { throw_no_cuda(); } -void cv::gpu::split(const GpuMat& /*src*/, std::vector& /*dst*/, Stream& /*stream*/) { throw_no_cuda(); } +void cv::gpu::split(InputArray, GpuMat*, Stream&) { throw_no_cuda(); } +void cv::gpu::split(InputArray, std::vector&, Stream&) { throw_no_cuda(); } void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } @@ -70,22 +70,27 @@ namespace cv { namespace gpu { namespace cudev { namespace split_merge { - void merge_caller(const PtrStepSzb* src, PtrStepSzb& dst, int total_channels, size_t elem_size, const cudaStream_t& stream); - void split_caller(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream); + void merge(const PtrStepSzb* src, PtrStepSzb& dst, int total_channels, size_t elem_size, const cudaStream_t& stream); + void split(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream); } }}} namespace { - void merge(const GpuMat* src, size_t n, GpuMat& dst, const cudaStream_t& stream) + void merge_caller(const GpuMat* src, size_t n, OutputArray _dst, Stream& stream) { - using namespace ::cv::gpu::cudev::split_merge; + CV_Assert( src != 0 ); + CV_Assert( n > 0 && n <= 4 ); - CV_Assert(src); - CV_Assert(n > 0); + const int depth = src[0].depth(); + const Size size = src[0].size(); - int depth = src[0].depth(); - Size size = src[0].size(); + for (size_t i = 0; i < n; ++i) + { + CV_Assert( src[i].size() == size ); + CV_Assert( src[i].depth() == depth ); + CV_Assert( src[i].channels() == 1 ); + } if (depth == CV_64F) { @@ -93,43 +98,32 @@ namespace CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double"); } - bool single_channel_only = true; - int total_channels = 0; - - for (size_t i = 0; i < n; ++i) + if (n == 1) { - CV_Assert(src[i].size() == size); - CV_Assert(src[i].depth() == depth); - single_channel_only = single_channel_only && src[i].channels() == 1; - total_channels += src[i].channels(); + src[0].copyTo(_dst, stream); } - - CV_Assert(single_channel_only); - CV_Assert(total_channels <= 4); - - if (total_channels == 1) - src[0].copyTo(dst); else { - dst.create(size, CV_MAKETYPE(depth, total_channels)); + _dst.create(size, CV_MAKE_TYPE(depth, (int)n)); + GpuMat dst = _dst.getGpuMat(); PtrStepSzb src_as_devmem[4]; for(size_t i = 0; i < n; ++i) src_as_devmem[i] = src[i]; PtrStepSzb dst_as_devmem(dst); - merge_caller(src_as_devmem, dst_as_devmem, total_channels, CV_ELEM_SIZE(depth), stream); + cv::gpu::cudev::split_merge::merge(src_as_devmem, dst_as_devmem, (int)n, CV_ELEM_SIZE(depth), StreamAccessor::getStream(stream)); } } - void split(const GpuMat& src, GpuMat* dst, const cudaStream_t& stream) + void split_caller(const GpuMat& src, GpuMat* dst, Stream& stream) { - using namespace ::cv::gpu::cudev::split_merge; + CV_Assert( dst != 0 ); - CV_Assert(dst); + const int depth = src.depth(); + const int num_channels = src.channels(); - int depth = src.depth(); - int num_channels = src.channels(); + CV_Assert( num_channels <= 4 ); if (depth == CV_64F) { @@ -139,45 +133,45 @@ namespace if (num_channels == 1) { - src.copyTo(dst[0]); + src.copyTo(dst[0], stream); return; } for (int i = 0; i < num_channels; ++i) dst[i].create(src.size(), depth); - CV_Assert(num_channels <= 4); - PtrStepSzb dst_as_devmem[4]; for (int i = 0; i < num_channels; ++i) dst_as_devmem[i] = dst[i]; PtrStepSzb src_as_devmem(src); - split_caller(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), stream); + cv::gpu::cudev::split_merge::split(src_as_devmem, dst_as_devmem, num_channels, src.elemSize1(), StreamAccessor::getStream(stream)); } } -void cv::gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream) +void cv::gpu::merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream) { - ::merge(src, n, dst, StreamAccessor::getStream(stream)); + merge_caller(src, n, dst, stream); } -void cv::gpu::merge(const std::vector& src, GpuMat& dst, Stream& stream) +void cv::gpu::merge(const std::vector& src, OutputArray dst, Stream& stream) { - ::merge(&src[0], src.size(), dst, StreamAccessor::getStream(stream)); + merge_caller(&src[0], src.size(), dst, stream); } -void cv::gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream) +void cv::gpu::split(InputArray _src, GpuMat* dst, Stream& stream) { - ::split(src, dst, StreamAccessor::getStream(stream)); + GpuMat src = _src.getGpuMat(); + split_caller(src, dst, stream); } -void cv::gpu::split(const GpuMat& src, std::vector& dst, Stream& stream) +void cv::gpu::split(InputArray _src, std::vector& dst, Stream& stream) { + GpuMat src = _src.getGpuMat(); dst.resize(src.channels()); if(src.channels() > 0) - ::split(src, &dst[0], StreamAccessor::getStream(stream)); + split_caller(src, &dst[0], stream); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/src/cuda/split_merge.cu b/modules/gpuarithm/src/cuda/split_merge.cu index 93aea3791..388441c63 100644 --- a/modules/gpuarithm/src/cuda/split_merge.cu +++ b/modules/gpuarithm/src/cuda/split_merge.cu @@ -278,7 +278,7 @@ namespace cv { namespace gpu { namespace cudev } - void merge_caller(const PtrStepSzb* src, PtrStepSzb& dst, + void merge(const PtrStepSzb* src, PtrStepSzb& dst, int total_channels, size_t elem_size, const cudaStream_t& stream) { @@ -487,7 +487,7 @@ namespace cv { namespace gpu { namespace cudev } - void split_caller(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream) + void split(const PtrStepSzb& src, PtrStepSzb* dst, int num_channels, size_t elem_size1, const cudaStream_t& stream) { static SplitFunction split_func_tbl[] = { From 0c50d0821fb88c95ff035c1b757df1451bbd68ad Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 16:09:51 +0400 Subject: [PATCH 040/121] switched to Input/Output Array in transpose/flip operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 12 +++++------ modules/gpuarithm/src/core.cpp | 20 ++++++++++++------- samples/gpu/driver_api_multi.cpp | 4 ++-- samples/gpu/multi.cpp | 4 ++-- 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 8996372b5..2bf60eff3 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -170,17 +170,17 @@ CV_EXPORTS void merge(const std::vector& src, OutputArray dst, Stream& s CV_EXPORTS void split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null()); CV_EXPORTS void split(InputArray src, std::vector& dst, Stream& stream = Stream::Null()); -//! implements generalized matrix product algorithm GEMM from BLAS -CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, - const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); - //! transposes the matrix //! supports matrix with element size = 1, 4 and 8 bytes (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc) -CV_EXPORTS void transpose(const GpuMat& src1, GpuMat& dst, Stream& stream = Stream::Null()); +CV_EXPORTS void transpose(InputArray src1, OutputArray dst, Stream& stream = Stream::Null()); //! reverses the order of the rows, columns or both in a matrix //! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth -CV_EXPORTS void flip(const GpuMat& a, GpuMat& b, int flipCode, Stream& stream = Stream::Null()); +CV_EXPORTS void flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null()); + +//! implements generalized matrix product algorithm GEMM from BLAS +CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, + const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) //! destination array will have the depth type as lut and the same channels number as source diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp index 92f085ebc..c8ef966e5 100644 --- a/modules/gpuarithm/src/core.cpp +++ b/modules/gpuarithm/src/core.cpp @@ -53,9 +53,9 @@ void cv::gpu::merge(const std::vector&, OutputArray, Stream&) { throw_no void cv::gpu::split(InputArray, GpuMat*, Stream&) { throw_no_cuda(); } void cv::gpu::split(InputArray, std::vector&, Stream&) { throw_no_cuda(); } -void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); } -void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); } @@ -182,13 +182,16 @@ namespace arithm template void transpose(PtrStepSz src, PtrStepSz dst, cudaStream_t stream); } -void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) +void cv::gpu::transpose(InputArray _src, OutputArray _dst, Stream& _stream) { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8 ); - dst.create( src.cols, src.rows, src.type() ); + _dst.create( src.cols, src.rows, src.type() ); + GpuMat dst = _dst.getGpuMat(); - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); if (src.elemSize() == 1) { @@ -260,7 +263,7 @@ namespace }; } -void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) +void cv::gpu::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stream) { typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream); static const func_t funcs[6][4] = @@ -273,10 +276,13 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) {NppMirror::call, 0, NppMirror::call, NppMirror::call} }; + GpuMat src = _src.getGpuMat(); + CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F); CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); - dst.create(src.size(), src.type()); + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); } diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 8b4623f41..a5343df4e 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -130,12 +130,12 @@ void Worker::operator()(int device_id) const rng.fill(src, RNG::UNIFORM, 0, 1); // CPU works - transpose(src, dst); + cv::transpose(src, dst); // GPU works GpuMat d_src(src); GpuMat d_dst; - transpose(d_src, d_dst); + gpu::transpose(d_src, d_dst); // Check results bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3; diff --git a/samples/gpu/multi.cpp b/samples/gpu/multi.cpp index 34b111829..0e9bef636 100644 --- a/samples/gpu/multi.cpp +++ b/samples/gpu/multi.cpp @@ -87,12 +87,12 @@ void Worker::operator()(int device_id) const rng.fill(src, RNG::UNIFORM, 0, 1); // CPU works - transpose(src, dst); + cv::transpose(src, dst); // GPU works GpuMat d_src(src); GpuMat d_dst; - transpose(d_src, d_dst); + gpu::transpose(d_src, d_dst); // Check results bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3; From 539f367d0b682d69967dec674d74dfbe362dcebf Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 26 Apr 2013 12:39:02 +0400 Subject: [PATCH 041/121] refactored gpu::LUT function: * converted it to Algorithm, because implementation uses inner buffers and requires preprocessing step * new implementation splits preprocessing and transform, what is more effecient * old API still can be used for source compatibility (marked as deprecated) --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 35 ++- modules/gpuarithm/perf/perf_core.cpp | 8 +- modules/gpuarithm/src/core.cpp | 263 +++++++++++++----- modules/gpuarithm/test/test_core.cpp | 8 +- 4 files changed, 234 insertions(+), 80 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 2bf60eff3..4272e1546 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -49,6 +49,17 @@ #include "opencv2/core/gpu.hpp" +#if defined __GNUC__ + #define __OPENCV_GPUARITHM_DEPR_BEFORE__ + #define __OPENCV_GPUARITHM_DEPR_AFTER__ __attribute__ ((deprecated)) +#elif (defined WIN32 || defined _WIN32) + #define __OPENCV_GPUARITHM_DEPR_BEFORE__ __declspec(deprecated) + #define __OPENCV_GPUARITHM_DEPR_AFTER__ +#else + #define __OPENCV_GPUARITHM_DEPR_BEFORE__ + #define __OPENCV_GPUARITHM_DEPR_AFTER__ +#endif + namespace cv { namespace gpu { //! adds one matrix to another (dst = src1 + src2) @@ -178,14 +189,25 @@ CV_EXPORTS void transpose(InputArray src1, OutputArray dst, Stream& stream = Str //! supports 1, 3 and 4 channels images with CV_8U, CV_16U, CV_32S or CV_32F depth CV_EXPORTS void flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null()); -//! implements generalized matrix product algorithm GEMM from BLAS -CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, - const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); - //! transforms 8-bit unsigned integers using lookup table: dst(i)=lut(src(i)) //! destination array will have the depth type as lut and the same channels number as source //! supports CV_8UC1, CV_8UC3 types -CV_EXPORTS void LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null()); +class CV_EXPORTS LookUpTable : public Algorithm +{ +public: + virtual void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; +}; +CV_EXPORTS Ptr createLookUpTable(InputArray lut); + +__OPENCV_GPUARITHM_DEPR_BEFORE__ void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__; +inline void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream) +{ + createLookUpTable(lut)->transform(src, dst, stream); +} + +//! implements generalized matrix product algorithm GEMM from BLAS +CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, + const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); //! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, @@ -311,4 +333,7 @@ CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& resul }} // namespace cv { namespace gpu { +#undef __OPENCV_GPUARITHM_DEPR_BEFORE__ +#undef __OPENCV_GPUARITHM_DEPR_AFTER__ + #endif /* __OPENCV_GPUARITHM_HPP__ */ diff --git a/modules/gpuarithm/perf/perf_core.cpp b/modules/gpuarithm/perf/perf_core.cpp index eab6d8736..0add472ca 100644 --- a/modules/gpuarithm/perf/perf_core.cpp +++ b/modules/gpuarithm/perf/perf_core.cpp @@ -224,10 +224,12 @@ PERF_TEST_P(Sz_Type, LutOneChannel, if (PERF_RUN_GPU()) { + cv::Ptr lutAlg = cv::gpu::createLookUpTable(lut); + const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - TEST_CYCLE() cv::gpu::LUT(d_src, lut, dst); + TEST_CYCLE() lutAlg->transform(d_src, dst); GPU_SANITY_CHECK(dst); } @@ -259,10 +261,12 @@ PERF_TEST_P(Sz_Type, LutMultiChannel, if (PERF_RUN_GPU()) { + cv::Ptr lutAlg = cv::gpu::createLookUpTable(lut); + const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - TEST_CYCLE() cv::gpu::LUT(d_src, lut, dst); + TEST_CYCLE() lutAlg->transform(d_src, dst); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp index c8ef966e5..5dc1d4a5e 100644 --- a/modules/gpuarithm/src/core.cpp +++ b/modules/gpuarithm/src/core.cpp @@ -57,7 +57,7 @@ void cv::gpu::transpose(InputArray, OutputArray, Stream&) { throw_no_cuda(); } void cv::gpu::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); } +Ptr cv::gpu::createLookUpTable(InputArray) { throw_no_cuda(); return Ptr(); } void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); } @@ -290,93 +290,214 @@ void cv::gpu::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& stre //////////////////////////////////////////////////////////////////////// // LUT -void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s) +#if (CUDA_VERSION >= 5000) + +namespace { - const int cn = src.channels(); - - CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); - CV_Assert( lut.depth() == CV_8U ); - CV_Assert( lut.channels() == 1 || lut.channels() == cn ); - CV_Assert( lut.rows * lut.cols == 256 && lut.isContinuous() ); - - dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn)); - - NppiSize sz; - sz.height = src.rows; - sz.width = src.cols; - - Mat nppLut; - lut.convertTo(nppLut, CV_32S); - - int nValues3[] = {256, 256, 256}; - - Npp32s pLevels[256]; - for (int i = 0; i < 256; ++i) - pLevels[i] = i; - - const Npp32s* pLevels3[3]; - -#if (CUDA_VERSION <= 4020) - pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; -#else - GpuMat d_pLevels; - d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); - pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr(); -#endif - - cudaStream_t stream = StreamAccessor::getStream(s); - NppStreamHandler h(stream); - - if (src.type() == CV_8UC1) - { -#if (CUDA_VERSION <= 4020) - nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, nppLut.ptr(), pLevels, 256) ); -#else - GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); - nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, d_nppLut.ptr(), d_pLevels.ptr(), 256) ); -#endif - } - else + class LookUpTableImpl : public LookUpTable { + public: + LookUpTableImpl(InputArray lut); + + void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + int lut_cn; + + int nValues3[3]; const Npp32s* pValues3[3]; + const Npp32s* pLevels3[3]; - Mat nppLut3[3]; - if (nppLut.channels() == 1) + GpuMat d_pLevels; + GpuMat d_nppLut; + GpuMat d_nppLut3[3]; + }; + + LookUpTableImpl::LookUpTableImpl(InputArray _lut) + { + nValues3[0] = nValues3[1] = nValues3[2] = 256; + + Npp32s pLevels[256]; + for (int i = 0; i < 256; ++i) + pLevels[i] = i; + + d_pLevels.upload(Mat(1, 256, CV_32S, pLevels)); + pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr(); + + GpuMat lut; + if (_lut.kind() == _InputArray::GPU_MAT) + { + lut = _lut.getGpuMat(); + } + else + { + Mat hLut = _lut.getMat(); + CV_Assert( hLut.total() == 256 && hLut.isContinuous() ); + lut.upload(Mat(1, 256, hLut.type(), hLut.data)); + } + + lut_cn = lut.channels(); + + CV_Assert( lut.depth() == CV_8U ); + CV_Assert( lut.rows == 1 && lut.cols == 256 ); + + lut.convertTo(d_nppLut, CV_32S); + + if (lut_cn == 1) { -#if (CUDA_VERSION <= 4020) - pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr(); -#else - GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data)); pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr(); -#endif + } + else + { + gpu::split(d_nppLut, d_nppLut3); + + pValues3[0] = d_nppLut3[0].ptr(); + pValues3[1] = d_nppLut3[1].ptr(); + pValues3[2] = d_nppLut3[2].ptr(); + } + } + + void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + + const int cn = src.channels(); + + CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); + CV_Assert( lut_cn == 1 || lut_cn == cn ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + NppStreamHandler h(stream); + + NppiSize sz; + sz.height = src.rows; + sz.width = src.cols; + + if (src.type() == CV_8UC1) + { + nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), + dst.ptr(), static_cast(dst.step), sz, d_nppLut.ptr(), d_pLevels.ptr(), 256) ); + } + else + { + nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), + dst.ptr(), static_cast(dst.step), sz, pValues3, pLevels3, nValues3) ); + } + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +#else // (CUDA_VERSION >= 5000) + +namespace +{ + class LookUpTableImpl : public LookUpTable + { + public: + LookUpTableImpl(InputArray lut); + + void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + int lut_cn; + + Npp32s pLevels[256]; + int nValues3[3]; + const Npp32s* pValues3[3]; + const Npp32s* pLevels3[3]; + + Mat nppLut; + Mat nppLut3[3]; + }; + + LookUpTableImpl::LookUpTableImpl(InputArray _lut) + { + nValues3[0] = nValues3[1] = nValues3[2] = 256; + + for (int i = 0; i < 256; ++i) + pLevels[i] = i; + pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels; + + Mat lut; + if (_lut.kind() == _InputArray::GPU_MAT) + { + lut = Mat(_lut.getGpuMat()); + } + else + { + Mat hLut = _lut.getMat(); + CV_Assert( hLut.total() == 256 && hLut.isContinuous() ); + lut = hLut; + } + + lut_cn = lut.channels(); + + CV_Assert( lut.depth() == CV_8U ); + CV_Assert( lut.rows == 1 && lut.cols == 256 ); + + lut.convertTo(nppLut, CV_32S); + + if (lut_cn == 1) + { + pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr(); } else { cv::split(nppLut, nppLut3); -#if (CUDA_VERSION <= 4020) pValues3[0] = nppLut3[0].ptr(); pValues3[1] = nppLut3[1].ptr(); pValues3[2] = nppLut3[2].ptr(); -#else - GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data)); - GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data)); - GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data)); - - pValues3[0] = d_nppLut0.ptr(); - pValues3[1] = d_nppLut1.ptr(); - pValues3[2] = d_nppLut2.ptr(); -#endif } - - nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, pValues3, pLevels3, nValues3) ); } - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); + void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + + const int cn = src.channels(); + + CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); + CV_Assert( lut_cn == 1 || lut_cn == cn ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + NppStreamHandler h(stream); + + NppiSize sz; + sz.height = src.rows; + sz.width = src.cols; + + if (src.type() == CV_8UC1) + { + nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr(), static_cast(src.step), + dst.ptr(), static_cast(dst.step), sz, nppLut.ptr(), pLevels, 256) ); + } + else + { + nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr(), static_cast(src.step), + dst.ptr(), static_cast(dst.step), sz, pValues3, pLevels3, nValues3) ); + } + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +#endif // (CUDA_VERSION >= 5000) + +Ptr cv::gpu::createLookUpTable(InputArray lut) +{ + return new LookUpTableImpl(lut); } //////////////////////////////////////////////////////////////////////// diff --git a/modules/gpuarithm/test/test_core.cpp b/modules/gpuarithm/test/test_core.cpp index 45f796dc5..d465aa463 100644 --- a/modules/gpuarithm/test/test_core.cpp +++ b/modules/gpuarithm/test/test_core.cpp @@ -323,8 +323,10 @@ GPU_TEST_P(LUT, OneChannel) cv::Mat src = randomMat(size, type); cv::Mat lut = randomMat(cv::Size(256, 1), CV_8UC1); + cv::Ptr lutAlg = cv::gpu::createLookUpTable(lut); + cv::gpu::GpuMat dst = createMat(size, CV_MAKE_TYPE(lut.depth(), src.channels())); - cv::gpu::LUT(loadMat(src, useRoi), lut, dst); + lutAlg->transform(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::LUT(src, lut, dst_gold); @@ -337,8 +339,10 @@ GPU_TEST_P(LUT, MultiChannel) cv::Mat src = randomMat(size, type); cv::Mat lut = randomMat(cv::Size(256, 1), CV_MAKE_TYPE(CV_8U, src.channels())); + cv::Ptr lutAlg = cv::gpu::createLookUpTable(lut); + cv::gpu::GpuMat dst = createMat(size, CV_MAKE_TYPE(lut.depth(), src.channels()), useRoi); - cv::gpu::LUT(loadMat(src, useRoi), lut, dst); + lutAlg->transform(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::LUT(src, lut, dst_gold); From c52d56964c0799f8da0ac04cf6a03bda0f194832 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 25 Apr 2013 16:56:19 +0400 Subject: [PATCH 042/121] switched to Input/Output Array in gpu::copyMakeBorder --- modules/gpuarithm/include/opencv2/gpuarithm.hpp | 8 ++++---- modules/gpuarithm/src/core.cpp | 15 +++++++++------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 4272e1546..5c5118671 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -205,6 +205,10 @@ inline void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream) createLookUpTable(lut)->transform(src, dst, stream); } +//! copies 2D array to a larger destination array and pads borders with user-specifiable constant +CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, + Scalar value = Scalar(), Stream& stream = Stream::Null()); + //! implements generalized matrix product algorithm GEMM from BLAS CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); @@ -273,10 +277,6 @@ CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuM //! output will have CV_32FC1 type CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null()); -//! copies 2D array to a larger destination array and pads borders with user-specifiable constant -CV_EXPORTS void copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, - const Scalar& value = Scalar(), Stream& stream = Stream::Null()); - //! computes the integral image //! sum will have CV_32S type, but will contain unsigned int values //! supports only CV_8UC1 source type diff --git a/modules/gpuarithm/src/core.cpp b/modules/gpuarithm/src/core.cpp index 5dc1d4a5e..22887796a 100644 --- a/modules/gpuarithm/src/core.cpp +++ b/modules/gpuarithm/src/core.cpp @@ -59,7 +59,7 @@ void cv::gpu::flip(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } Ptr cv::gpu::createLookUpTable(InputArray) { throw_no_cuda(); return Ptr(); } -void cv::gpu::copyMakeBorder(const GpuMat&, GpuMat&, int, int, int, int, int, const Scalar&, Stream&) { throw_no_cuda(); } +void cv::gpu::copyMakeBorder(InputArray, OutputArray, int, int, int, int, int, Scalar, Stream&) { throw_no_cuda(); } #else /* !defined (HAVE_CUDA) */ @@ -529,14 +529,17 @@ typedef Npp32s __attribute__((__may_alias__)) Npp32s_a; typedef Npp32s Npp32s_a; #endif -void cv::gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value, Stream& s) +void cv::gpu::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bottom, int left, int right, int borderType, Scalar value, Stream& _stream) { - CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); - CV_Assert(borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP); + GpuMat src = _src.getGpuMat(); - dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); + CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 ); + CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP ); - cudaStream_t stream = StreamAccessor::getStream(s); + _dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); if (borderType == BORDER_CONSTANT && (src.type() == CV_8UC1 || src.type() == CV_8UC4 || src.type() == CV_32SC1 || src.type() == CV_32FC1)) { From 8fcef225fb89ec8047b72f068fc51065d7eb308a Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 26 Apr 2013 14:40:44 +0400 Subject: [PATCH 043/121] switched to Input/Output Array in reductions operations --- modules/gpu/src/cascadeclassifier.cpp | 2 +- .../gpuarithm/include/opencv2/gpuarithm.hpp | 145 ++++++--- modules/gpuarithm/perf/perf_arithm.cpp | 6 +- modules/gpuarithm/perf/perf_reductions.cpp | 3 +- modules/gpuarithm/src/arithm.cpp | 117 ------- modules/gpuarithm/src/reductions.cpp | 302 +++++++++++------- modules/gpuimgproc/src/match_template.cpp | 2 +- modules/nonfree/src/surf_gpu.cpp | 4 +- samples/gpu/driver_api_multi.cpp | 2 +- samples/gpu/farneback_optical_flow.cpp | 4 +- samples/gpu/multi.cpp | 2 +- 11 files changed, 299 insertions(+), 290 deletions(-) diff --git a/modules/gpu/src/cascadeclassifier.cpp b/modules/gpu/src/cascadeclassifier.cpp index 0f1da83ce..74867b48d 100644 --- a/modules/gpu/src/cascadeclassifier.cpp +++ b/modules/gpu/src/cascadeclassifier.cpp @@ -458,7 +458,7 @@ public: // generate integral for scale gpu::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR); - gpu::integralBuffered(src, sint, buff); + gpu::integral(src, sint, buff); // calculate job int totalWidth = level.workArea.width / step; diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 5c5118671..b131aba2e 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -209,85 +209,150 @@ inline void LUT(InputArray src, InputArray lut, OutputArray dst, Stream& stream) CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, Scalar value = Scalar(), Stream& stream = Stream::Null()); -//! implements generalized matrix product algorithm GEMM from BLAS -CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, - const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); - -//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values -CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, - int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()); -CV_EXPORTS void normalize(const GpuMat& src, GpuMat& dst, double a, double b, - int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf); - //! computes norm of array //! supports NORM_INF, NORM_L1, NORM_L2 //! supports all matrices except 64F -CV_EXPORTS double norm(const GpuMat& src1, int normType=NORM_L2); -CV_EXPORTS double norm(const GpuMat& src1, int normType, GpuMat& buf); -CV_EXPORTS double norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf); +CV_EXPORTS double norm(InputArray src1, int normType, InputArray mask, GpuMat& buf); +static inline double norm(InputArray src, int normType) +{ + GpuMat buf; + return norm(src, normType, GpuMat(), buf); +} +static inline double norm(InputArray src, int normType, GpuMat& buf) +{ + return norm(src, normType, GpuMat(), buf); +} //! computes norm of the difference between two arrays //! supports NORM_INF, NORM_L1, NORM_L2 //! supports only CV_8UC1 type -CV_EXPORTS double norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2); +CV_EXPORTS double norm(InputArray src1, InputArray src2, GpuMat& buf, int normType=NORM_L2); +static inline double norm(InputArray src1, InputArray src2, int normType=NORM_L2) +{ + GpuMat buf; + return norm(src1, src2, buf, normType); +} //! computes sum of array elements //! supports only single channel images -CV_EXPORTS Scalar sum(const GpuMat& src); -CV_EXPORTS Scalar sum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); +CV_EXPORTS Scalar sum(InputArray src, InputArray mask, GpuMat& buf); +static inline Scalar sum(InputArray src) +{ + GpuMat buf; + return sum(src, GpuMat(), buf); +} +static inline Scalar sum(InputArray src, GpuMat& buf) +{ + return sum(src, GpuMat(), buf); +} //! computes sum of array elements absolute values //! supports only single channel images -CV_EXPORTS Scalar absSum(const GpuMat& src); -CV_EXPORTS Scalar absSum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); +CV_EXPORTS Scalar absSum(InputArray src, InputArray mask, GpuMat& buf); +static inline Scalar absSum(InputArray src) +{ + GpuMat buf; + return absSum(src, GpuMat(), buf); +} +static inline Scalar absSum(InputArray src, GpuMat& buf) +{ + return absSum(src, GpuMat(), buf); +} //! computes squared sum of array elements //! supports only single channel images -CV_EXPORTS Scalar sqrSum(const GpuMat& src); -CV_EXPORTS Scalar sqrSum(const GpuMat& src, GpuMat& buf); -CV_EXPORTS Scalar sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf); +CV_EXPORTS Scalar sqrSum(InputArray src, InputArray mask, GpuMat& buf); +static inline Scalar sqrSum(InputArray src) +{ + GpuMat buf; + return sqrSum(src, GpuMat(), buf); +} +static inline Scalar sqrSum(InputArray src, GpuMat& buf) +{ + return sqrSum(src, GpuMat(), buf); +} //! finds global minimum and maximum array elements and returns their values -CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat()); -CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf); +CV_EXPORTS void minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf); +static inline void minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray()) +{ + GpuMat buf; + minMax(src, minVal, maxVal, mask, buf); +} //! finds global minimum and maximum array elements and returns their values with locations -CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, - const GpuMat& mask=GpuMat()); -CV_EXPORTS void minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, - const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf); +CV_EXPORTS void minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, + InputArray mask, GpuMat& valbuf, GpuMat& locbuf); +static inline void minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, + InputArray mask=noArray()) +{ + GpuMat valBuf, locBuf; + minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf); +} //! counts non-zero array elements -CV_EXPORTS int countNonZero(const GpuMat& src); -CV_EXPORTS int countNonZero(const GpuMat& src, GpuMat& buf); +CV_EXPORTS int countNonZero(InputArray src, GpuMat& buf); +static inline int countNonZero(const GpuMat& src) +{ + GpuMat buf; + return countNonZero(src, buf); +} //! reduces a matrix to a vector -CV_EXPORTS void reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); +CV_EXPORTS void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()); //! computes mean value and standard deviation of all or selected array elements //! supports only CV_8UC1 type -CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev); -//! buffered version -CV_EXPORTS void meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf); +CV_EXPORTS void meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf); +static inline void meanStdDev(InputArray src, Scalar& mean, Scalar& stddev) +{ + GpuMat buf; + meanStdDev(src, mean, stddev, buf); +} //! computes the standard deviation of integral images //! supports only CV_32SC1 source type and CV_32FC1 sqr type //! output will have CV_32FC1 type -CV_EXPORTS void rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null()); +CV_EXPORTS void rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null()); + +//! scales and shifts array elements so that either the specified norm (alpha) or the minimum (alpha) and maximum (beta) array values get the specified values +CV_EXPORTS void normalize(InputArray src, OutputArray dst, double alpha, double beta, + int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf); +static inline void normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0, + int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray()) +{ + GpuMat norm_buf; + GpuMat cvt_buf; + normalize(src, dst, alpha, beta, norm_type, dtype, mask, norm_buf, cvt_buf); +} //! computes the integral image //! sum will have CV_32S type, but will contain unsigned int values //! supports only CV_8UC1 source type -CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null()); -//! buffered version -CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& stream = Stream::Null()); +CV_EXPORTS void integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null()); +static inline void integralBuffered(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null()) +{ + integral(src, sum, buffer, stream); +} +static inline void integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null()) +{ + GpuMat buffer; + integral(src, sum, buffer, stream); +} //! computes squared integral image //! result matrix will have 64F type, but will contain 64U values //! supports source images of 8UC1 type only -CV_EXPORTS void sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null()); +CV_EXPORTS void sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null()); +static inline void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null()) +{ + GpuMat buffer; + sqrIntegral(src, sqsum, buffer, stream); +} + +//! implements generalized matrix product algorithm GEMM from BLAS +CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, + const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); //! performs per-element multiplication of two full (not packed) Fourier spectrums //! supports 32FC2 matrixes only (interleaved format) diff --git a/modules/gpuarithm/perf/perf_arithm.cpp b/modules/gpuarithm/perf/perf_arithm.cpp index b553fc212..5f15fb47d 100644 --- a/modules/gpuarithm/perf/perf_arithm.cpp +++ b/modules/gpuarithm/perf/perf_arithm.cpp @@ -265,7 +265,7 @@ PERF_TEST_P(Sz, Integral, cv::gpu::GpuMat dst; cv::gpu::GpuMat d_buf; - TEST_CYCLE() cv::gpu::integralBuffered(d_src, dst, d_buf); + TEST_CYCLE() cv::gpu::integral(d_src, dst, d_buf); GPU_SANITY_CHECK(dst); } @@ -293,9 +293,9 @@ PERF_TEST_P(Sz, IntegralSqr, if (PERF_RUN_GPU()) { const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; + cv::gpu::GpuMat dst, buf; - TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst); + TEST_CYCLE() cv::gpu::sqrIntegral(d_src, dst, buf); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpuarithm/perf/perf_reductions.cpp b/modules/gpuarithm/perf/perf_reductions.cpp index 8d73180dc..c541ce0e2 100644 --- a/modules/gpuarithm/perf/perf_reductions.cpp +++ b/modules/gpuarithm/perf/perf_reductions.cpp @@ -108,9 +108,10 @@ PERF_TEST_P(Sz_Norm, NormDiff, { const cv::gpu::GpuMat d_src1(src1); const cv::gpu::GpuMat d_src2(src2); + cv::gpu::GpuMat d_buf; double gpu_dst; - TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, normType); + TEST_CYCLE() gpu_dst = cv::gpu::norm(d_src1, d_src2, d_buf, normType); SANITY_CHECK(gpu_dst); diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp index a6cd1cb62..210097fb8 100644 --- a/modules/gpuarithm/src/arithm.cpp +++ b/modules/gpuarithm/src/arithm.cpp @@ -49,11 +49,6 @@ using namespace cv::gpu; void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double, GpuMat&, int, Stream&) { throw_no_cuda(); } -void cv::gpu::integral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } -void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); } void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); } @@ -294,116 +289,6 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G #endif } -//////////////////////////////////////////////////////////////////////// -// integral - -void cv::gpu::integral(const GpuMat& src, GpuMat& sum, Stream& s) -{ - GpuMat buffer; - gpu::integralBuffered(src, sum, buffer, s); -} - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz integral, cudaStream_t stream); - } -}}} - -void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, Stream& s) -{ - CV_Assert(src.type() == CV_8UC1); - - cudaStream_t stream = StreamAccessor::getStream(s); - - cv::Size whole; - cv::Point offset; - - src.locateROI(whole, offset); - - if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048 - && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast(src.step) - offset.x)) - { - ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer); - - cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream); - - sum.create(src.rows + 1, src.cols + 1, CV_32SC1); - - sum.setTo(Scalar::all(0), s); - - GpuMat inner = sum(Rect(1, 1, src.cols, src.rows)); - GpuMat res = buffer(Rect(0, 0, src.cols, src.rows)); - - res.copyTo(inner, s); - } - else - { -#ifndef HAVE_OPENCV_GPULEGACY - throw_no_cuda(); -#else - sum.create(src.rows + 1, src.cols + 1, CV_32SC1); - - NcvSize32u roiSize; - roiSize.width = src.cols; - roiSize.height = src.rows; - - cudaDeviceProp prop; - cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); - - Ncv32u bufSize; - ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); - ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer); - - NppStStreamHandler h(stream); - - ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast(src.ptr()), static_cast(src.step), - sum.ptr(), static_cast(sum.step), roiSize, buffer.ptr(), bufSize, prop) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); -#endif - } -} - -////////////////////////////////////////////////////////////////////////////// -// sqrIntegral - -void cv::gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& s) -{ -#ifndef HAVE_OPENCV_GPULEGACY - (void) src; - (void) sqsum; - (void) s; - throw_no_cuda(); -#else - CV_Assert(src.type() == CV_8U); - - NcvSize32u roiSize; - roiSize.width = src.cols; - roiSize.height = src.rows; - - cudaDeviceProp prop; - cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); - - Ncv32u bufSize; - ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop)); - GpuMat buf(1, bufSize, CV_8U); - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStStreamHandler h(stream); - - sqsum.create(src.rows + 1, src.cols + 1, CV_64F); - ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast(src.ptr(0)), static_cast(src.step), - sqsum.ptr(0), static_cast(sqsum.step), roiSize, buf.ptr(0), bufSize, prop)); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); -#endif -} - ////////////////////////////////////////////////////////////////////////////// // mulSpectrums @@ -650,8 +535,6 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, (void) stream; throw_no_cuda(); #else - using namespace cv::gpu::cudev::imgproc; - CV_Assert(image.type() == CV_32F); CV_Assert(templ.type() == CV_32F); diff --git a/modules/gpuarithm/src/reductions.cpp b/modules/gpuarithm/src/reductions.cpp index b8b24188d..248fa9a4e 100644 --- a/modules/gpuarithm/src/reductions.cpp +++ b/modules/gpuarithm/src/reductions.cpp @@ -47,41 +47,28 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -double cv::gpu::norm(const GpuMat&, int) { throw_no_cuda(); return 0.0; } -double cv::gpu::norm(const GpuMat&, int, GpuMat&) { throw_no_cuda(); return 0.0; } -double cv::gpu::norm(const GpuMat&, int, const GpuMat&, GpuMat&) { throw_no_cuda(); return 0.0; } -double cv::gpu::norm(const GpuMat&, const GpuMat&, int) { throw_no_cuda(); return 0.0; } +double cv::gpu::norm(InputArray, int, InputArray, GpuMat&) { throw_no_cuda(); return 0.0; } +double cv::gpu::norm(InputArray, InputArray, GpuMat&, int) { throw_no_cuda(); return 0.0; } -Scalar cv::gpu::sum(const GpuMat&) { throw_no_cuda(); return Scalar(); } -Scalar cv::gpu::sum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); } -Scalar cv::gpu::sum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); } +Scalar cv::gpu::sum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); } +Scalar cv::gpu::absSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); } +Scalar cv::gpu::sqrSum(InputArray, InputArray, GpuMat&) { throw_no_cuda(); return Scalar(); } -Scalar cv::gpu::absSum(const GpuMat&) { throw_no_cuda(); return Scalar(); } -Scalar cv::gpu::absSum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); } -Scalar cv::gpu::absSum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); } +void cv::gpu::minMax(InputArray, double*, double*, InputArray, GpuMat&) { throw_no_cuda(); } +void cv::gpu::minMaxLoc(InputArray, double*, double*, Point*, Point*, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); } -Scalar cv::gpu::sqrSum(const GpuMat&) { throw_no_cuda(); return Scalar(); } -Scalar cv::gpu::sqrSum(const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); } -Scalar cv::gpu::sqrSum(const GpuMat&, const GpuMat&, GpuMat&) { throw_no_cuda(); return Scalar(); } +int cv::gpu::countNonZero(InputArray, GpuMat&) { throw_no_cuda(); return 0; } -void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&) { throw_no_cuda(); } -void cv::gpu::minMax(const GpuMat&, double*, double*, const GpuMat&, GpuMat&) { throw_no_cuda(); } +void cv::gpu::reduce(InputArray, OutputArray, int, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const GpuMat&) { throw_no_cuda(); } -void cv::gpu::minMaxLoc(const GpuMat&, double*, double*, Point*, Point*, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } +void cv::gpu::meanStdDev(InputArray, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); } -int cv::gpu::countNonZero(const GpuMat&) { throw_no_cuda(); return 0; } -int cv::gpu::countNonZero(const GpuMat&, GpuMat&) { throw_no_cuda(); return 0; } +void cv::gpu::rectStdDev(InputArray, InputArray, OutputArray, Rect, Stream&) { throw_no_cuda(); } -void cv::gpu::reduce(const GpuMat&, GpuMat&, int, int, int, Stream&) { throw_no_cuda(); } +void cv::gpu::normalize(InputArray, OutputArray, double, double, int, int, InputArray, GpuMat&, GpuMat&) { throw_no_cuda(); } -void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&) { throw_no_cuda(); } -void cv::gpu::meanStdDev(const GpuMat&, Scalar&, Scalar&, GpuMat&) { throw_no_cuda(); } - -void cv::gpu::rectStdDev(const GpuMat&, const GpuMat&, GpuMat&, const Rect&, Stream&) { throw_no_cuda(); } - -void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); } -void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); } +void cv::gpu::integral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); } +void cv::gpu::sqrIntegral(InputArray, OutputArray, GpuMat&, Stream&) { throw_no_cuda(); } #else @@ -124,21 +111,13 @@ namespace //////////////////////////////////////////////////////////////////////// // norm -double cv::gpu::norm(const GpuMat& src, int normType) +double cv::gpu::norm(InputArray _src, int normType, InputArray _mask, GpuMat& buf) { - GpuMat buf; - return gpu::norm(src, normType, GpuMat(), buf); -} + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); -double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf) -{ - return gpu::norm(src, normType, GpuMat(), buf); -} - -double cv::gpu::norm(const GpuMat& src, int normType, const GpuMat& mask, GpuMat& buf) -{ - CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); - CV_Assert(mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size() && src.channels() == 1)); + CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 ); + CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size() && src.channels() == 1) ); GpuMat src_single_channel = src.reshape(1); @@ -154,13 +133,11 @@ double cv::gpu::norm(const GpuMat& src, int normType, const GpuMat& mask, GpuMat return std::max(std::abs(min_val), std::abs(max_val)); } -double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) +double cv::gpu::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType) { - CV_Assert(src1.type() == CV_8UC1); - CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); - CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); - #if CUDA_VERSION < 5050 + (void) buf; + typedef NppStatus (*func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, NppiSize oSizeROI, Npp64f* pRetVal); static const func_t funcs[] = {nppiNormDiff_Inf_8u_C1R, nppiNormDiff_L1_8u_C1R, nppiNormDiff_L2_8u_C1R}; @@ -175,13 +152,18 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) static const buf_size_func_t buf_size_funcs[] = {nppiNormDiffInfGetBufferHostSize_8u_C1R, nppiNormDiffL1GetBufferHostSize_8u_C1R, nppiNormDiffL2GetBufferHostSize_8u_C1R}; #endif + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + + CV_Assert( src1.type() == CV_8UC1 ); + CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() ); + CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 ); + NppiSize sz; sz.width = src1.cols; sz.height = src1.rows; - int funcIdx = normType >> 1; - - double retVal; + const int funcIdx = normType >> 1; DeviceBuffer dbuf; @@ -191,13 +173,14 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) int bufSize; buf_size_funcs[funcIdx](sz, &bufSize); - GpuMat buf(1, bufSize, CV_8UC1); + ensureSizeIsEnough(1, bufSize, CV_8UC1, buf); nppSafeCall( funcs[funcIdx](src1.ptr(), static_cast(src1.step), src2.ptr(), static_cast(src2.step), sz, dbuf, buf.data) ); #endif cudaSafeCall( cudaDeviceSynchronize() ); + double retVal; dbuf.download(&retVal); return retVal; @@ -220,19 +203,11 @@ namespace sum void runSqr(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); } -Scalar cv::gpu::sum(const GpuMat& src) +Scalar cv::gpu::sum(InputArray _src, InputArray _mask, GpuMat& buf) { - GpuMat buf; - return gpu::sum(src, GpuMat(), buf); -} + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); -Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) -{ - return gpu::sum(src, GpuMat(), buf); -} - -Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) -{ typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); static const func_t funcs[7][5] = { @@ -266,19 +241,11 @@ Scalar cv::gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) return Scalar(result[0], result[1], result[2], result[3]); } -Scalar cv::gpu::absSum(const GpuMat& src) +Scalar cv::gpu::absSum(InputArray _src, InputArray _mask, GpuMat& buf) { - GpuMat buf; - return gpu::absSum(src, GpuMat(), buf); -} + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); -Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) -{ - return gpu::absSum(src, GpuMat(), buf); -} - -Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) -{ typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); static const func_t funcs[7][5] = { @@ -312,19 +279,11 @@ Scalar cv::gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) return Scalar(result[0], result[1], result[2], result[3]); } -Scalar cv::gpu::sqrSum(const GpuMat& src) +Scalar cv::gpu::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf) { - GpuMat buf; - return gpu::sqrSum(src, GpuMat(), buf); -} + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); -Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) -{ - return gpu::sqrSum(src, GpuMat(), buf); -} - -Scalar cv::gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) -{ typedef void (*func_t)(PtrStepSzb src, void* buf, double* sum, PtrStepSzb mask); static const func_t funcs[7][5] = { @@ -369,14 +328,11 @@ namespace minMax void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); } -void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask) +void cv::gpu::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf) { - GpuMat buf; - gpu::minMax(src, minVal, maxVal, mask, buf); -} + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); -void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) -{ typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); static const func_t funcs[] = { @@ -419,15 +375,12 @@ namespace minMaxLoc void run(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); } -void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask) +void cv::gpu::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, + InputArray _mask, GpuMat& valBuf, GpuMat& locBuf) { - GpuMat valBuf, locBuf; - gpu::minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf); -} + GpuMat src = _src.getGpuMat(); + GpuMat mask = _mask.getGpuMat(); -void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, - const GpuMat& mask, GpuMat& valBuf, GpuMat& locBuf) -{ typedef void (*func_t)(const PtrStepSzb src, const PtrStepb mask, double* minval, double* maxval, int* minloc, int* maxloc, PtrStepb valbuf, PtrStep locbuf); static const func_t funcs[] = { @@ -472,14 +425,10 @@ namespace countNonZero int run(const PtrStepSzb src, PtrStep buf); } -int cv::gpu::countNonZero(const GpuMat& src) +int cv::gpu::countNonZero(InputArray _src, GpuMat& buf) { - GpuMat buf; - return countNonZero(src, buf); -} + GpuMat src = _src.getGpuMat(); -int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) -{ typedef int (*func_t)(const PtrStepSzb src, PtrStep buf); static const func_t funcs[] = { @@ -521,8 +470,10 @@ namespace reduce void cols(PtrStepSzb src, void* dst, int cn, int op, cudaStream_t stream); } -void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int dtype, Stream& stream) +void cv::gpu::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream) { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.channels() <= 4 ); CV_Assert( dim == 0 || dim == 1 ); CV_Assert( reduceOp == REDUCE_SUM || reduceOp == REDUCE_AVG || reduceOp == REDUCE_MAX || reduceOp == REDUCE_MIN ); @@ -530,7 +481,8 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int if (dtype < 0) dtype = src.depth(); - dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); + _dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); + GpuMat dst = _dst.getGpuMat(); if (dim == 0) { @@ -691,15 +643,11 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int //////////////////////////////////////////////////////////////////////// // meanStdDev -void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev) +void cv::gpu::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat& buf) { - GpuMat buf; - meanStdDev(src, mean, stddev, buf); -} + GpuMat src = _src.getGpuMat(); -void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat& buf) -{ - CV_Assert(src.type() == CV_8UC1); + CV_Assert( src.type() == CV_8UC1 ); if (!deviceSupports(FEATURE_SET_COMPUTE_13)) CV_Error(cv::Error::StsNotImplemented, "Not sufficient compute capebility"); @@ -730,11 +678,15 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat ////////////////////////////////////////////////////////////////////////////// // rectStdDev -void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& s) +void cv::gpu::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Rect rect, Stream& _stream) { - CV_Assert(src.type() == CV_32SC1 && sqr.type() == CV_64FC1); + GpuMat src = _src.getGpuMat(); + GpuMat sqr = _sqr.getGpuMat(); - dst.create(src.size(), CV_32FC1); + CV_Assert( src.type() == CV_32SC1 && sqr.type() == CV_64FC1 ); + + _dst.create(src.size(), CV_32FC1); + GpuMat dst = _dst.getGpuMat(); NppiSize sz; sz.width = src.cols; @@ -746,7 +698,7 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons nppRect.x = rect.x; nppRect.y = rect.y; - cudaStream_t stream = StreamAccessor::getStream(s); + cudaStream_t stream = StreamAccessor::getStream(_stream); NppStreamHandler h(stream); @@ -760,16 +712,12 @@ void cv::gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, cons //////////////////////////////////////////////////////////////////////// // normalize -void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask) +void cv::gpu::normalize(InputArray _src, OutputArray dst, double a, double b, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf) { - GpuMat norm_buf; - GpuMat cvt_buf; - normalize(src, dst, a, b, norm_type, dtype, mask, norm_buf, cvt_buf); -} + GpuMat src = _src.getGpuMat(); -void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf) -{ double scale = 1, shift = 0; + if (norm_type == NORM_MINMAX) { double smin = 0, smax = 0; @@ -800,4 +748,116 @@ void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int } } +//////////////////////////////////////////////////////////////////////// +// integral + +namespace cv { namespace gpu { namespace cudev +{ + namespace imgproc + { + void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz integral, cudaStream_t stream); + } +}}} + +void cv::gpu::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& _stream) +{ + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8UC1 ); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + cv::Size whole; + cv::Point offset; + src.locateROI(whole, offset); + + if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048 + && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast(src.step) - offset.x)) + { + ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer); + + cv::gpu::cudev::imgproc::shfl_integral_gpu(src, buffer, stream); + + _dst.create(src.rows + 1, src.cols + 1, CV_32SC1); + GpuMat dst = _dst.getGpuMat(); + + dst.setTo(Scalar::all(0), _stream); + + GpuMat inner = dst(Rect(1, 1, src.cols, src.rows)); + GpuMat res = buffer(Rect(0, 0, src.cols, src.rows)); + + res.copyTo(inner, _stream); + } + else + { + #ifndef HAVE_OPENCV_GPULEGACY + throw_no_cuda(); + #else + _dst.create(src.rows + 1, src.cols + 1, CV_32SC1); + GpuMat dst = _dst.getGpuMat(); + + NcvSize32u roiSize; + roiSize.width = src.cols; + roiSize.height = src.rows; + + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); + + Ncv32u bufSize; + ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); + ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer); + + NppStStreamHandler h(stream); + + ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast(src.ptr()), static_cast(src.step), + dst.ptr(), static_cast(dst.step), roiSize, buffer.ptr(), bufSize, prop) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + #endif + } +} + +////////////////////////////////////////////////////////////////////////////// +// sqrIntegral + +void cv::gpu::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& _stream) +{ +#ifndef HAVE_OPENCV_GPULEGACY + (void) _src; + (void) _dst; + (void) _stream; + throw_no_cuda(); +#else + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8U ); + + NcvSize32u roiSize; + roiSize.width = src.cols; + roiSize.height = src.rows; + + cudaDeviceProp prop; + cudaSafeCall( cudaGetDeviceProperties(&prop, cv::gpu::getDevice()) ); + + Ncv32u bufSize; + ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop)); + + ensureSizeIsEnough(1, bufSize, CV_8U, buf); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + NppStStreamHandler h(stream); + + _dst.create(src.rows + 1, src.cols + 1, CV_64F); + GpuMat dst = _dst.getGpuMat(); + + ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast(src.ptr(0)), static_cast(src.step), + dst.ptr(0), static_cast(dst.step), roiSize, buf.ptr(0), bufSize, prop)); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); +#endif +} + #endif diff --git a/modules/gpuimgproc/src/match_template.cpp b/modules/gpuimgproc/src/match_template.cpp index 008d3da1c..c5375c288 100644 --- a/modules/gpuimgproc/src/match_template.cpp +++ b/modules/gpuimgproc/src/match_template.cpp @@ -268,7 +268,7 @@ namespace buf.image_sums.resize(1); gpu::integral(image, buf.image_sums[0], stream); - unsigned int templ_sum = (unsigned int)sum(templ)[0]; + unsigned int templ_sum = (unsigned int)gpu::sum(templ)[0]; matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, buf.image_sums[0], templ_sum, result, StreamAccessor::getStream(stream)); } else diff --git a/modules/nonfree/src/surf_gpu.cpp b/modules/nonfree/src/surf_gpu.cpp index 82ade2927..35805470b 100644 --- a/modules/nonfree/src/surf_gpu.cpp +++ b/modules/nonfree/src/surf_gpu.cpp @@ -142,13 +142,13 @@ namespace bindImgTex(img); - gpu::integralBuffered(img, surf_.sum, surf_.intBuffer); + gpu::integral(img, surf_.sum, surf_.intBuffer); sumOffset = bindSumTex(surf_.sum); if (use_mask) { gpu::min(mask, 1.0, surf_.mask1); - gpu::integralBuffered(surf_.mask1, surf_.maskSum, surf_.intBuffer); + gpu::integral(surf_.mask1, surf_.maskSum, surf_.intBuffer); maskOffset = bindMaskSumTex(surf_.maskSum); } } diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index a5343df4e..1dfe2123c 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -138,7 +138,7 @@ void Worker::operator()(int device_id) const gpu::transpose(d_src, d_dst); // Check results - bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3; + bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3; std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): " << (passed ? "passed" : "FAILED") << endl; diff --git a/samples/gpu/farneback_optical_flow.cpp b/samples/gpu/farneback_optical_flow.cpp index c93ceb055..c2a5d411e 100644 --- a/samples/gpu/farneback_optical_flow.cpp +++ b/samples/gpu/farneback_optical_flow.cpp @@ -22,9 +22,9 @@ inline T mapVal(T x, T a, T b, T c, T d) static void colorizeFlow(const Mat &u, const Mat &v, Mat &dst) { double uMin, uMax; - minMaxLoc(u, &uMin, &uMax, 0, 0); + cv::minMaxLoc(u, &uMin, &uMax, 0, 0); double vMin, vMax; - minMaxLoc(v, &vMin, &vMax, 0, 0); + cv::minMaxLoc(v, &vMin, &vMax, 0, 0); uMin = ::abs(uMin); uMax = ::abs(uMax); vMin = ::abs(vMin); vMax = ::abs(vMax); float dMax = static_cast(::max(::max(uMin, uMax), ::max(vMin, vMax))); diff --git a/samples/gpu/multi.cpp b/samples/gpu/multi.cpp index 0e9bef636..c6e6aa398 100644 --- a/samples/gpu/multi.cpp +++ b/samples/gpu/multi.cpp @@ -95,7 +95,7 @@ void Worker::operator()(int device_id) const gpu::transpose(d_src, d_dst); // Check results - bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3; + bool passed = cv::norm(dst - Mat(d_dst), NORM_INF) < 1e-3; std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): " << (passed ? "passed" : "FAILED") << endl; From 948661d7222a692f9ff488018a254cfdd030ba2b Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 26 Apr 2013 12:22:56 +0400 Subject: [PATCH 044/121] switched to Input/Output Array in gpu::gemm --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 5 +-- modules/gpuarithm/src/arithm.cpp | 37 +++++++++++-------- 2 files changed, 23 insertions(+), 19 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index b131aba2e..79fd37f08 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -350,9 +350,8 @@ static inline void sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream sqrIntegral(src, sqsum, buffer, stream); } -//! implements generalized matrix product algorithm GEMM from BLAS -CV_EXPORTS void gemm(const GpuMat& src1, const GpuMat& src2, double alpha, - const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()); +CV_EXPORTS void gemm(InputArray src1, InputArray src2, double alpha, + InputArray src3, double beta, OutputArray dst, int flags = 0, Stream& stream = Stream::Null()); //! performs per-element multiplication of two full (not packed) Fourier spectrums //! supports 32FC2 matrixes only (interleaved format) diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp index 210097fb8..e632bfe5b 100644 --- a/modules/gpuarithm/src/arithm.cpp +++ b/modules/gpuarithm/src/arithm.cpp @@ -47,7 +47,7 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); } void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); } void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); } @@ -164,23 +164,27 @@ namespace //////////////////////////////////////////////////////////////////////// // gemm -void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags, Stream& stream) +void cv::gpu::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray _src3, double beta, OutputArray _dst, int flags, Stream& stream) { #ifndef HAVE_CUBLAS - (void)src1; - (void)src2; - (void)alpha; - (void)src3; - (void)beta; - (void)dst; - (void)flags; - (void)stream; - CV_Error(cv::Error::StsNotImplemented, "The library was build without CUBLAS"); + (void) _src1; + (void) _src2; + (void) alpha; + (void) _src3; + (void) beta; + (void) _dst; + (void) flags; + (void) stream; + CV_Error(:Error::StsNotImplemented, "The library was build without CUBLAS"); #else // CUBLAS works with column-major matrices - CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2); - CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type())); + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); + GpuMat src3 = _src3.getGpuMat(); + + CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 ); + CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) ); if (src1.depth() == CV_64F) { @@ -203,10 +207,11 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G Size src3Size = tr3 ? Size(src3.rows, src3.cols) : src3.size(); Size dstSize(src2Size.width, src1Size.height); - CV_Assert(src1Size.width == src2Size.height); - CV_Assert(src3.empty() || src3Size == dstSize); + CV_Assert( src1Size.width == src2Size.height ); + CV_Assert( src3.empty() || src3Size == dstSize ); - dst.create(dstSize, src1.type()); + _dst.create(dstSize, src1.type()); + GpuMat dst = _dst.getGpuMat(); if (beta != 0) { From 26a4be89b12c47f4c1845ad6e9a414f41c516a02 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 26 Apr 2013 11:33:46 +0400 Subject: [PATCH 045/121] switched to Input/Output Array in Fourier operations --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 6 +- modules/gpuarithm/src/arithm.cpp | 113 ++++++++++-------- 2 files changed, 66 insertions(+), 53 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 79fd37f08..555fa7b86 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -355,11 +355,11 @@ CV_EXPORTS void gemm(InputArray src1, InputArray src2, double alpha, //! performs per-element multiplication of two full (not packed) Fourier spectrums //! supports 32FC2 matrixes only (interleaved format) -CV_EXPORTS void mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream = Stream::Null()); +CV_EXPORTS void mulSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, bool conjB=false, Stream& stream = Stream::Null()); //! performs per-element multiplication of two full (not packed) Fourier spectrums //! supports 32FC2 matrixes only (interleaved format) -CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()); +CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()); //! Performs a forward or inverse discrete Fourier transform (1D or 2D) of floating point matrix. //! Param dft_size is the size of DFT transform. @@ -372,7 +372,7 @@ CV_EXPORTS void mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c //! in CUFFT's format. Result as full complex matrix for such kind of transform cannot be retrieved. //! //! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format. -CV_EXPORTS void dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); +CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); struct CV_EXPORTS ConvolveBuf { diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp index e632bfe5b..88af76a17 100644 --- a/modules/gpuarithm/src/arithm.cpp +++ b/modules/gpuarithm/src/arithm.cpp @@ -49,10 +49,10 @@ using namespace cv::gpu; void cv::gpu::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); } -void cv::gpu::mulSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, bool, Stream&) { throw_no_cuda(); } -void cv::gpu::mulAndScaleSpectrums(const GpuMat&, const GpuMat&, GpuMat&, int, float, bool, Stream&) { throw_no_cuda(); } +void cv::gpu::mulSpectrums(InputArray, InputArray, OutputArray, int, bool, Stream&) { throw_no_cuda(); } +void cv::gpu::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, float, bool, Stream&) { throw_no_cuda(); } -void cv::gpu::dft(const GpuMat&, GpuMat&, Size, int, Stream&) { throw_no_cuda(); } +void cv::gpu::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); } void cv::gpu::ConvolveBuf::create(Size, Size) { throw_no_cuda(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_no_cuda(); } @@ -308,12 +308,12 @@ namespace cv { namespace gpu { namespace cudev #endif -void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB, Stream& stream) +void cv::gpu::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, bool conjB, Stream& stream) { #ifndef HAVE_CUFFT - (void) a; - (void) b; - (void) c; + (void) _src1; + (void) _src2; + (void) _dst; (void) flags; (void) conjB; (void) stream; @@ -322,16 +322,19 @@ void cv::gpu::mulSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flag (void) flags; typedef void (*Caller)(const PtrStep, const PtrStep, PtrStepSz, cudaStream_t stream); - static Caller callers[] = { cudev::mulSpectrums, cudev::mulSpectrums_CONJ }; - CV_Assert(a.type() == b.type() && a.type() == CV_32FC2); - CV_Assert(a.size() == b.size()); + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - c.create(a.size(), CV_32FC2); + CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 ); + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), CV_32FC2); + GpuMat dst = _dst.getGpuMat(); Caller caller = callers[(int)conjB]; - caller(a, b, c, StreamAccessor::getStream(stream)); + caller(src1, src2, dst, StreamAccessor::getStream(stream)); #endif } @@ -349,12 +352,12 @@ namespace cv { namespace gpu { namespace cudev #endif -void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB, Stream& stream) +void cv::gpu::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream) { #ifndef HAVE_CUFFT - (void) a; - (void) b; - (void) c; + (void) _src1; + (void) _src2; + (void) _dst; (void) flags; (void) scale; (void) conjB; @@ -366,53 +369,57 @@ void cv::gpu::mulAndScaleSpectrums(const GpuMat& a, const GpuMat& b, GpuMat& c, typedef void (*Caller)(const PtrStep, const PtrStep, float scale, PtrStepSz, cudaStream_t stream); static Caller callers[] = { cudev::mulAndScaleSpectrums, cudev::mulAndScaleSpectrums_CONJ }; - CV_Assert(a.type() == b.type() && a.type() == CV_32FC2); - CV_Assert(a.size() == b.size()); + GpuMat src1 = _src1.getGpuMat(); + GpuMat src2 = _src2.getGpuMat(); - c.create(a.size(), CV_32FC2); + CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2); + CV_Assert( src1.size() == src2.size() ); + + _dst.create(src1.size(), CV_32FC2); + GpuMat dst = _dst.getGpuMat(); Caller caller = callers[(int)conjB]; - caller(a, b, scale, c, StreamAccessor::getStream(stream)); + caller(src1, src2, scale, dst, StreamAccessor::getStream(stream)); #endif } ////////////////////////////////////////////////////////////////////////////// // dft -void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stream& stream) +void cv::gpu::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream) { #ifndef HAVE_CUFFT - (void) src; - (void) dst; + (void) _src; + (void) _dst; (void) dft_size; (void) flags; (void) stream; throw_no_cuda(); #else + GpuMat src = _src.getGpuMat(); - CV_Assert(src.type() == CV_32F || src.type() == CV_32FC2); + CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 ); // We don't support unpacked output (in the case of real input) - CV_Assert(!(flags & DFT_COMPLEX_OUTPUT)); + CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) ); - bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1); - int is_row_dft = flags & DFT_ROWS; - int is_scaled_dft = flags & DFT_SCALE; - int is_inverse = flags & DFT_INVERSE; - bool is_complex_input = src.channels() == 2; - bool is_complex_output = !(flags & DFT_REAL_OUTPUT); + const bool is_1d_input = (dft_size.height == 1) || (dft_size.width == 1); + const bool is_row_dft = (flags & DFT_ROWS) != 0; + const bool is_scaled_dft = (flags & DFT_SCALE) != 0; + const bool is_inverse = (flags & DFT_INVERSE) != 0; + const bool is_complex_input = src.channels() == 2; + const bool is_complex_output = !(flags & DFT_REAL_OUTPUT); // We don't support real-to-real transform - CV_Assert(is_complex_input || is_complex_output); + CV_Assert( is_complex_input || is_complex_output ); - GpuMat src_data; + GpuMat src_cont = src; // Make sure here we work with the continuous input, // as CUFFT can't handle gaps - src_data = src; - createContinuous(src.rows, src.cols, src.type(), src_data); - if (src_data.data != src.data) - src.copyTo(src_data); + createContinuous(src.rows, src.cols, src.type(), src_cont); + if (src_cont.data != src.data) + src.copyTo(src_cont, stream); Size dft_size_opt = dft_size; if (is_1d_input && !is_row_dft) @@ -422,17 +429,17 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stre dft_size_opt.height = std::min(dft_size.width, dft_size.height); } + CV_Assert( dft_size_opt.width > 1 ); + cufftType dft_type = CUFFT_R2C; if (is_complex_input) dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R; - CV_Assert(dft_size_opt.width > 1); - cufftHandle plan; if (is_1d_input || is_row_dft) - cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height); + cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) ); else - cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type); + cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) ); cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) ); @@ -440,34 +447,40 @@ void cv::gpu::dft(const GpuMat& src, GpuMat& dst, Size dft_size, int flags, Stre { if (is_complex_output) { - createContinuous(dft_size, CV_32FC2, dst); + createContinuous(dft_size, CV_32FC2, _dst); + GpuMat dst = _dst.getGpuMat(); + cufftSafeCall(cufftExecC2C( - plan, src_data.ptr(), dst.ptr(), + plan, src_cont.ptr(), dst.ptr(), is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD)); } else { - createContinuous(dft_size, CV_32F, dst); + createContinuous(dft_size, CV_32F, _dst); + GpuMat dst = _dst.getGpuMat(); + cufftSafeCall(cufftExecC2R( - plan, src_data.ptr(), dst.ptr())); + plan, src_cont.ptr(), dst.ptr())); } } else { // We could swap dft_size for efficiency. Here we must reflect it if (dft_size == dft_size_opt) - createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, dst); + createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst); else - createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, dst); + createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst); + + GpuMat dst = _dst.getGpuMat(); cufftSafeCall(cufftExecR2C( - plan, src_data.ptr(), dst.ptr())); + plan, src_cont.ptr(), dst.ptr())); } - cufftSafeCall(cufftDestroy(plan)); + cufftSafeCall( cufftDestroy(plan) ); if (is_scaled_dft) - multiply(dst, Scalar::all(1. / dft_size.area()), dst, 1, -1, stream); + gpu::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream); #endif } From 8461cb3f4bbe4ce82d072bf1bcfef0feb4bb86dd Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 26 Apr 2013 12:40:03 +0400 Subject: [PATCH 046/121] refactored gpu::convolve function: * converted it to Algorithm * old API still can be used for source compatibility (marked as deprecated) --- .../gpuarithm/include/opencv2/gpuarithm.hpp | 32 ++- modules/gpuarithm/perf/perf_arithm.cpp | 7 +- modules/gpuarithm/src/arithm.cpp | 264 +++++++++--------- modules/gpuarithm/test/test_arithm.cpp | 4 +- modules/gpuimgproc/src/match_template.cpp | 9 +- 5 files changed, 175 insertions(+), 141 deletions(-) diff --git a/modules/gpuarithm/include/opencv2/gpuarithm.hpp b/modules/gpuarithm/include/opencv2/gpuarithm.hpp index 555fa7b86..8fbe296d8 100644 --- a/modules/gpuarithm/include/opencv2/gpuarithm.hpp +++ b/modules/gpuarithm/include/opencv2/gpuarithm.hpp @@ -374,7 +374,23 @@ CV_EXPORTS void mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArr //! For complex-to-real transform it is assumed that the source matrix is packed in CUFFT's format. CV_EXPORTS void dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()); -struct CV_EXPORTS ConvolveBuf +//! computes convolution (or cross-correlation) of two images using discrete Fourier transform +//! supports source images of 32FC1 type only +//! result matrix will have 32FC1 type +class CV_EXPORTS Convolution : public Algorithm +{ +public: + virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0; +}; +CV_EXPORTS Ptr createConvolution(Size user_block_size = Size()); + +__OPENCV_GPUARITHM_DEPR_BEFORE__ void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__; +inline void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr , Stream& stream) +{ + createConvolution()->convolve(image, templ, result, ccorr, stream); +} + +struct ConvolveBuf { Size result_size; Size block_size; @@ -385,15 +401,15 @@ struct CV_EXPORTS ConvolveBuf GpuMat image_spect, templ_spect, result_spect; GpuMat image_block, templ_block, result_data; - void create(Size image_size, Size templ_size); - static Size estimateBlockSize(Size result_size, Size templ_size); + void create(Size, Size){} + static Size estimateBlockSize(Size, Size){ return Size(); } }; -//! computes convolution (or cross-correlation) of two images using discrete Fourier transform -//! supports source images of 32FC1 type only -//! result matrix will have 32FC1 type -CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr = false); -CV_EXPORTS void convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null()); +__OPENCV_GPUARITHM_DEPR_BEFORE__ void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr, ConvolveBuf& buf, Stream& stream = Stream::Null()) __OPENCV_GPUARITHM_DEPR_AFTER__; +inline void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr, ConvolveBuf& buf, Stream& stream) +{ + createConvolution(buf.user_block_size)->convolve(image, templ, result, ccorr, stream); +} }} // namespace cv { namespace gpu { diff --git a/modules/gpuarithm/perf/perf_arithm.cpp b/modules/gpuarithm/perf/perf_arithm.cpp index 5f15fb47d..dfeafa0fa 100644 --- a/modules/gpuarithm/perf/perf_arithm.cpp +++ b/modules/gpuarithm/perf/perf_arithm.cpp @@ -228,10 +228,11 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve, cv::gpu::GpuMat d_templ = cv::gpu::createContinuous(templ_size, templ_size, CV_32FC1); d_templ.upload(templ); - cv::gpu::GpuMat dst; - cv::gpu::ConvolveBuf d_buf; + cv::Ptr convolution = cv::gpu::createConvolution(); - TEST_CYCLE() cv::gpu::convolve(d_image, d_templ, dst, ccorr, d_buf); + cv::gpu::GpuMat dst; + + TEST_CYCLE() convolution->convolve(d_image, d_templ, dst, ccorr); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp index 88af76a17..6045cf5ba 100644 --- a/modules/gpuarithm/src/arithm.cpp +++ b/modules/gpuarithm/src/arithm.cpp @@ -54,9 +54,7 @@ void cv::gpu::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, flo void cv::gpu::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); } -void cv::gpu::ConvolveBuf::create(Size, Size) { throw_no_cuda(); } -void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_no_cuda(); } -void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&, Stream&) { throw_no_cuda(); } +Ptr cv::gpu::createConvolution(Size) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -486,136 +484,152 @@ void cv::gpu::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, S } ////////////////////////////////////////////////////////////////////////////// -// convolve +// Convolution -void cv::gpu::ConvolveBuf::create(Size image_size, Size templ_size) +#ifdef HAVE_CUFFT + +namespace { - result_size = Size(image_size.width - templ_size.width + 1, - image_size.height - templ_size.height + 1); - - block_size = user_block_size; - if (user_block_size.width == 0 || user_block_size.height == 0) - block_size = estimateBlockSize(result_size, templ_size); - - dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.))); - dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.))); - - // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192), - // see CUDA Toolkit 4.1 CUFFT Library Programming Guide - if (dft_size.width > 8192) - dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1); - if (dft_size.height > 8192) - dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1); - - // To avoid wasting time doing small DFTs - dft_size.width = std::max(dft_size.width, 512); - dft_size.height = std::max(dft_size.height, 512); - - createContinuous(dft_size, CV_32F, image_block); - createContinuous(dft_size, CV_32F, templ_block); - createContinuous(dft_size, CV_32F, result_data); - - spect_len = dft_size.height * (dft_size.width / 2 + 1); - createContinuous(1, spect_len, CV_32FC2, image_spect); - createContinuous(1, spect_len, CV_32FC2, templ_spect); - createContinuous(1, spect_len, CV_32FC2, result_spect); - - // Use maximum result matrix block size for the estimated DFT block size - block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); - block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); -} - - -Size cv::gpu::ConvolveBuf::estimateBlockSize(Size result_size, Size /*templ_size*/) -{ - int width = (result_size.width + 2) / 3; - int height = (result_size.height + 2) / 3; - width = std::min(width, result_size.width); - height = std::min(height, result_size.height); - return Size(width, height); -} - - -void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr) -{ - ConvolveBuf buf; - gpu::convolve(image, templ, result, ccorr, buf); -} - -void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream) -{ -#ifndef HAVE_CUFFT - (void) image; - (void) templ; - (void) result; - (void) ccorr; - (void) buf; - (void) stream; - throw_no_cuda(); -#else - CV_Assert(image.type() == CV_32F); - CV_Assert(templ.type() == CV_32F); - - buf.create(image.size(), templ.size()); - result.create(buf.result_size, CV_32F); - - Size& block_size = buf.block_size; - Size& dft_size = buf.dft_size; - - GpuMat& image_block = buf.image_block; - GpuMat& templ_block = buf.templ_block; - GpuMat& result_data = buf.result_data; - - GpuMat& image_spect = buf.image_spect; - GpuMat& templ_spect = buf.templ_spect; - GpuMat& result_spect = buf.result_spect; - - cufftHandle planR2C, planC2R; - cufftSafeCall(cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R)); - cufftSafeCall(cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C)); - - cufftSafeCall( cufftSetStream(planR2C, StreamAccessor::getStream(stream)) ); - cufftSafeCall( cufftSetStream(planC2R, StreamAccessor::getStream(stream)) ); - - GpuMat templ_roi(templ.size(), CV_32F, templ.data, templ.step); - gpu::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, - templ_block.cols - templ_roi.cols, 0, Scalar(), stream); - - cufftSafeCall(cufftExecR2C(planR2C, templ_block.ptr(), - templ_spect.ptr())); - - // Process all blocks of the result matrix - for (int y = 0; y < result.rows; y += block_size.height) + class ConvolutionImpl : public Convolution { - for (int x = 0; x < result.cols; x += block_size.width) - { - Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, - std::min(y + dft_size.height, image.rows) - y); - GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr(y) + x), - image.step); - gpu::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, - 0, image_block.cols - image_roi.cols, 0, Scalar(), stream); + public: + explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {} - cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr(), - image_spect.ptr())); - gpu::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0, - 1.f / dft_size.area(), ccorr, stream); - cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr(), - result_data.ptr())); + void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()); - Size result_roi_size(std::min(x + block_size.width, result.cols) - x, - std::min(y + block_size.height, result.rows) - y); - GpuMat result_roi(result_roi_size, result.type(), - (void*)(result.ptr(y) + x), result.step); - GpuMat result_block(result_roi_size, result_data.type(), - result_data.ptr(), result_data.step); + private: + void create(Size image_size, Size templ_size); + static Size estimateBlockSize(Size result_size); - result_block.copyTo(result_roi, stream); - } + Size result_size; + Size block_size; + Size user_block_size; + Size dft_size; + int spect_len; + + GpuMat image_spect, templ_spect, result_spect; + GpuMat image_block, templ_block, result_data; + }; + + void ConvolutionImpl::create(Size image_size, Size templ_size) + { + result_size = Size(image_size.width - templ_size.width + 1, + image_size.height - templ_size.height + 1); + + block_size = user_block_size; + if (user_block_size.width == 0 || user_block_size.height == 0) + block_size = estimateBlockSize(result_size); + + dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.))); + dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.))); + + // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192), + // see CUDA Toolkit 4.1 CUFFT Library Programming Guide + if (dft_size.width > 8192) + dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1); + if (dft_size.height > 8192) + dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1); + + // To avoid wasting time doing small DFTs + dft_size.width = std::max(dft_size.width, 512); + dft_size.height = std::max(dft_size.height, 512); + + createContinuous(dft_size, CV_32F, image_block); + createContinuous(dft_size, CV_32F, templ_block); + createContinuous(dft_size, CV_32F, result_data); + + spect_len = dft_size.height * (dft_size.width / 2 + 1); + createContinuous(1, spect_len, CV_32FC2, image_spect); + createContinuous(1, spect_len, CV_32FC2, templ_spect); + createContinuous(1, spect_len, CV_32FC2, result_spect); + + // Use maximum result matrix block size for the estimated DFT block size + block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); + block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); } - cufftSafeCall(cufftDestroy(planR2C)); - cufftSafeCall(cufftDestroy(planC2R)); + Size ConvolutionImpl::estimateBlockSize(Size result_size) + { + int width = (result_size.width + 2) / 3; + int height = (result_size.height + 2) / 3; + width = std::min(width, result_size.width); + height = std::min(height, result_size.height); + return Size(width, height); + } + + void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream) + { + GpuMat image = _image.getGpuMat(); + GpuMat templ = _templ.getGpuMat(); + + CV_Assert( image.type() == CV_32FC1 ); + CV_Assert( templ.type() == CV_32FC1 ); + + create(image.size(), templ.size()); + + _result.create(result_size, CV_32FC1); + GpuMat result = _result.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + cufftHandle planR2C, planC2R; + cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) ); + cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) ); + + cufftSafeCall( cufftSetStream(planR2C, stream) ); + cufftSafeCall( cufftSetStream(planC2R, stream) ); + + GpuMat templ_roi(templ.size(), CV_32FC1, templ.data, templ.step); + gpu::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, + templ_block.cols - templ_roi.cols, 0, Scalar(), _stream); + + cufftSafeCall( cufftExecR2C(planR2C, templ_block.ptr(), templ_spect.ptr()) ); + + // Process all blocks of the result matrix + for (int y = 0; y < result.rows; y += block_size.height) + { + for (int x = 0; x < result.cols; x += block_size.width) + { + Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, + std::min(y + dft_size.height, image.rows) - y); + GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr(y) + x), + image.step); + gpu::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, + 0, image_block.cols - image_roi.cols, 0, Scalar(), _stream); + + cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr(), + image_spect.ptr())); + gpu::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0, + 1.f / dft_size.area(), ccorr, _stream); + cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr(), + result_data.ptr())); + + Size result_roi_size(std::min(x + block_size.width, result.cols) - x, + std::min(y + block_size.height, result.rows) - y); + GpuMat result_roi(result_roi_size, result.type(), + (void*)(result.ptr(y) + x), result.step); + GpuMat result_block(result_roi_size, result_data.type(), + result_data.ptr(), result_data.step); + + result_block.copyTo(result_roi, _stream); + } + } + + cufftSafeCall( cufftDestroy(planR2C) ); + cufftSafeCall( cufftDestroy(planC2R) ); + } +} + +#endif + +Ptr cv::gpu::createConvolution(Size user_block_size) +{ +#ifndef HAVE_CUBLAS + (void) user_block_size; + CV_Error(cv::Error::StsNotImplemented, "The library was build without CUFFT"); + return Ptr(); +#else + return new ConvolutionImpl(user_block_size); #endif } diff --git a/modules/gpuarithm/test/test_arithm.cpp b/modules/gpuarithm/test/test_arithm.cpp index 93fb0ae84..0534e219d 100644 --- a/modules/gpuarithm/test/test_arithm.cpp +++ b/modules/gpuarithm/test/test_arithm.cpp @@ -419,8 +419,10 @@ GPU_TEST_P(Convolve, Accuracy) cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0); cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0); + cv::Ptr conv = cv::gpu::createConvolution(); + cv::gpu::GpuMat dst; - cv::gpu::convolve(loadMat(src), loadMat(kernel), dst, ccorr); + conv->convolve(loadMat(src), loadMat(kernel), dst, ccorr); cv::Mat dst_gold; convolveDFT(src, kernel, dst_gold, ccorr); diff --git a/modules/gpuimgproc/src/match_template.cpp b/modules/gpuimgproc/src/match_template.cpp index c5375c288..059d41ca9 100644 --- a/modules/gpuimgproc/src/match_template.cpp +++ b/modules/gpuimgproc/src/match_template.cpp @@ -172,15 +172,16 @@ namespace return; } - gpu::ConvolveBuf convolve_buf; - convolve_buf.user_block_size = buf.user_block_size; + Ptr conv = gpu::createConvolution(buf.user_block_size); if (image.channels() == 1) - gpu::convolve(image.reshape(1), templ.reshape(1), result, true, convolve_buf, stream); + { + conv->convolve(image.reshape(1), templ.reshape(1), result, true, stream); + } else { GpuMat result_; - gpu::convolve(image.reshape(1), templ.reshape(1), result_, true, convolve_buf, stream); + conv->convolve(image.reshape(1), templ.reshape(1), result_, true, stream); extractFirstChannel_32F(result_, result, image.channels(), StreamAccessor::getStream(stream)); } } From a3341006ee096df6d59b2734c3b8a1ca8df9cff8 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 11 Jun 2013 15:45:04 +0400 Subject: [PATCH 047/121] updated documentation --- modules/gpuarithm/doc/arithm.rst | 105 +-- modules/gpuarithm/doc/core.rst | 156 +++-- modules/gpuarithm/doc/element_operations.rst | 671 +++++++++---------- modules/gpuarithm/doc/reductions.rst | 164 +++-- modules/gpufilters/doc/filtering.rst | 4 +- 5 files changed, 545 insertions(+), 555 deletions(-) diff --git a/modules/gpuarithm/doc/arithm.rst b/modules/gpuarithm/doc/arithm.rst index 8a051bc49..2f1d74df5 100644 --- a/modules/gpuarithm/doc/arithm.rst +++ b/modules/gpuarithm/doc/arithm.rst @@ -6,10 +6,10 @@ Arithm Operations on Matrices gpu::gemm ------------------- +--------- Performs generalized matrix multiplication. -.. ocv:function:: void gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::gemm(InputArray src1, InputArray src2, double alpha, InputArray src3, double beta, OutputArray dst, int flags = 0, Stream& stream = Stream::Null()) :param src1: First multiplied input matrix that should have ``CV_32FC1`` , ``CV_64FC1`` , ``CV_32FC2`` , or ``CV_64FC2`` type. @@ -44,38 +44,40 @@ The function performs generalized matrix multiplication similar to the ``gemm`` gpu::mulSpectrums ---------------------- +----------------- Performs a per-element multiplication of two Fourier spectrums. -.. ocv:function:: void gpu::mulSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, bool conjB=false, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::mulSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, bool conjB=false, Stream& stream = Stream::Null()) - :param a: First spectrum. + :param src1: First spectrum. - :param b: Second spectrum with the same size and type as ``a`` . + :param src2: Second spectrum with the same size and type as ``a`` . - :param c: Destination spectrum. + :param dst: Destination spectrum. :param flags: Mock parameter used for CPU/GPU interfaces similarity. :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication. - Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now. + :param stream: Stream for the asynchronous version. + +Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now. .. seealso:: :ocv:func:`mulSpectrums` gpu::mulAndScaleSpectrums ------------------------------ +------------------------- Performs a per-element multiplication of two Fourier spectrums and scales the result. -.. ocv:function:: void gpu::mulAndScaleSpectrums( const GpuMat& a, const GpuMat& b, GpuMat& c, int flags, float scale, bool conjB=false, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::mulAndScaleSpectrums(InputArray src1, InputArray src2, OutputArray dst, int flags, float scale, bool conjB=false, Stream& stream = Stream::Null()) - :param a: First spectrum. + :param src1: First spectrum. - :param b: Second spectrum with the same size and type as ``a`` . + :param src2: Second spectrum with the same size and type as ``a`` . - :param c: Destination spectrum. + :param dst: Destination spectrum. :param flags: Mock parameter used for CPU/GPU interfaces similarity. @@ -83,17 +85,17 @@ Performs a per-element multiplication of two Fourier spectrums and scales the re :param conjB: Optional flag to specify if the second spectrum needs to be conjugated before the multiplication. - Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now. +Only full (not packed) ``CV_32FC2`` complex spectrums in the interleaved format are supported for now. .. seealso:: :ocv:func:`mulSpectrums` gpu::dft ------------- +-------- Performs a forward or inverse discrete Fourier transform (1D or 2D) of the floating point matrix. -.. ocv:function:: void gpu::dft( const GpuMat& src, GpuMat& dst, Size dft_size, int flags=0, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::dft(InputArray src, OutputArray dst, Size dft_size, int flags=0, Stream& stream = Stream::Null()) :param src: Source matrix (real or complex). @@ -125,46 +127,25 @@ The source matrix should be continuous, otherwise reallocation and data copying -gpu::ConvolveBuf +gpu::Convolution ---------------- -.. ocv:struct:: gpu::ConvolveBuf +.. ocv:class:: gpu::Convolution : public Algorithm -Class providing a memory buffer for :ocv:func:`gpu::convolve` function, plus it allows to adjust some specific parameters. :: +Base class for convolution (or cross-correlation) operator. :: - struct CV_EXPORTS ConvolveBuf + class CV_EXPORTS Convolution : public Algorithm { - Size result_size; - Size block_size; - Size user_block_size; - Size dft_size; - int spect_len; - - GpuMat image_spect, templ_spect, result_spect; - GpuMat image_block, templ_block, result_data; - - void create(Size image_size, Size templ_size); - static Size estimateBlockSize(Size result_size, Size templ_size); + public: + virtual void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) = 0; }; -You can use field `user_block_size` to set specific block size for :ocv:func:`gpu::convolve` function. If you leave its default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed. - -gpu::ConvolveBuf::create ------------------------- -.. ocv:function:: gpu::ConvolveBuf::create(Size image_size, Size templ_size) - -Constructs a buffer for :ocv:func:`gpu::convolve` function with respective arguments. - - - -gpu::convolve ------------------ +gpu::Convolution::convolve +--------------------------- Computes a convolution (or cross-correlation) of two images. -.. ocv:function:: void gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr=false) - -.. ocv:function:: void gpu::convolve( const GpuMat& image, const GpuMat& templ, GpuMat& result, bool ccorr, ConvolveBuf& buf, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::Convolution::convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null()) :param image: Source image. Only ``CV_32FC1`` images are supported for now. @@ -174,38 +155,16 @@ Computes a convolution (or cross-correlation) of two images. :param ccorr: Flags to evaluate cross-correlation instead of convolution. - :param buf: Optional buffer to avoid extra memory allocations and to adjust some specific parameters. See :ocv:struct:`gpu::ConvolveBuf`. - :param stream: Stream for the asynchronous version. .. seealso:: :ocv:func:`gpu::filter2D` -gpu::integral ------------------ -Computes an integral image. +gpu::createConvolution +---------------------- +Creates implementation for :ocv:class:`gpu::Convolution` . -.. ocv:function:: void gpu::integral(const GpuMat& src, GpuMat& sum, Stream& stream = Stream::Null()) +.. ocv:function:: Ptr createConvolution(Size user_block_size = Size()) - :param src: Source image. Only ``CV_8UC1`` images are supported for now. - - :param sum: Integral image containing 32-bit unsigned integer values packed into ``CV_32SC1`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`integral` - - - -gpu::sqrIntegral --------------------- -Computes a squared integral image. - -.. ocv:function:: void gpu::sqrIntegral(const GpuMat& src, GpuMat& sqsum, Stream& stream = Stream::Null()) - - :param src: Source image. Only ``CV_8UC1`` images are supported for now. - - :param sqsum: Squared integral image containing 64-bit unsigned integer values packed into ``CV_64FC1`` . - - :param stream: Stream for the asynchronous version. + :param user_block_size: Block size. If you leave default value `Size(0,0)` then automatic estimation of block size will be used (which is optimized for speed). By varying `user_block_size` you can reduce memory requirements at the cost of speed. diff --git a/modules/gpuarithm/doc/core.rst b/modules/gpuarithm/doc/core.rst index 50599bcf2..624ea3e7b 100644 --- a/modules/gpuarithm/doc/core.rst +++ b/modules/gpuarithm/doc/core.rst @@ -6,12 +6,12 @@ Core Operations on Matrices gpu::merge --------------- +---------- Makes a multi-channel matrix out of several single-channel matrices. -.. ocv:function:: void gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::merge(const GpuMat* src, size_t n, OutputArray dst, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::merge(const vector& src, GpuMat& dst, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::merge(const std::vector& src, OutputArray dst, Stream& stream = Stream::Null()) :param src: Array/vector of source matrices. @@ -26,12 +26,12 @@ Makes a multi-channel matrix out of several single-channel matrices. gpu::split --------------- +---------- Copies each plane of a multi-channel matrix into an array. -.. ocv:function:: void gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::split(InputArray src, GpuMat* dst, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::split(const GpuMat& src, vector& dst, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::split(InputArray src, vector& dst, Stream& stream = Stream::Null()) :param src: Source matrix. @@ -43,15 +43,95 @@ Copies each plane of a multi-channel matrix into an array. +gpu::transpose +-------------- +Transposes a matrix. + +.. ocv:function:: void gpu::transpose(InputArray src1, OutputArray dst, Stream& stream = Stream::Null()) + + :param src1: Source matrix. 1-, 4-, 8-byte element sizes are supported for now. + + :param dst: Destination matrix. + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`transpose` + + + +gpu::flip +--------- +Flips a 2D matrix around vertical, horizontal, or both axes. + +.. ocv:function:: void gpu::flip(InputArray src, OutputArray dst, int flipCode, Stream& stream = Stream::Null()) + + :param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth. + + :param dst: Destination matrix. + + :param flipCode: Flip mode for the source: + + * ``0`` Flips around x-axis. + + * ``> 0`` Flips around y-axis. + + * ``< 0`` Flips around both axes. + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`flip` + + + +gpu::LookUpTable +---------------- +.. ocv:class:: gpu::LookUpTable : public Algorithm + +Base class for transform using lookup table. :: + + class CV_EXPORTS LookUpTable : public Algorithm + { + public: + virtual void transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; + }; + +.. seealso:: :ocv:func:`LUT` + + + +gpu::LookUpTable::transform +--------------------------- +Transforms the source matrix into the destination matrix using the given look-up table: ``dst(I) = lut(src(I))`` . + +.. ocv:function:: void gpu::LookUpTable::transform(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. ``CV_8UC1`` and ``CV_8UC3`` matrices are supported for now. + + :param dst: Destination matrix. + + :param stream: Stream for the asynchronous version. + + + +gpu::createLookUpTable +---------------------- +Creates implementation for :ocv:class:`gpu::LookUpTable` . + +.. ocv:function:: Ptr createLookUpTable(InputArray lut) + + :param lut: Look-up table of 256 elements. It is a continuous ``CV_8U`` matrix. + + + gpu::copyMakeBorder ----------------------- Forms a border around an image. -.. ocv:function:: void gpu::copyMakeBorder(const GpuMat& src, GpuMat& dst, int top, int bottom, int left, int right, int borderType, const Scalar& value = Scalar(), Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::copyMakeBorder(InputArray src, OutputArray dst, int top, int bottom, int left, int right, int borderType, Scalar value = Scalar(), Stream& stream = Stream::Null()) - :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and ``CV_32FC1`` types are supported. + :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_32SC1`` , and ``CV_32FC1`` types are supported. - :param dst: Destination image with the same type as ``src``. The size is ``Size(src.cols+left+right, src.rows+top+bottom)`` . + :param dst: Destination image with the same type as ``src``. The size is ``Size(src.cols+left+right, src.rows+top+bottom)`` . :param top: @@ -68,61 +148,3 @@ Forms a border around an image. :param stream: Stream for the asynchronous version. .. seealso:: :ocv:func:`copyMakeBorder` - - - -gpu::transpose ------------------- -Transposes a matrix. - -.. ocv:function:: void gpu::transpose( const GpuMat& src1, GpuMat& dst, Stream& stream=Stream::Null() ) - - :param src1: Source matrix. 1-, 4-, 8-byte element sizes are supported for now (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc). - - :param dst: Destination matrix. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`transpose` - - - -gpu::flip -------------- -Flips a 2D matrix around vertical, horizontal, or both axes. - -.. ocv:function:: void gpu::flip( const GpuMat& a, GpuMat& b, int flipCode, Stream& stream=Stream::Null() ) - - :param a: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth. - - :param b: Destination matrix. - - :param flipCode: Flip mode for the source: - - * ``0`` Flips around x-axis. - - * ``>0`` Flips around y-axis. - - * ``<0`` Flips around both axes. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`flip` - - - -gpu::LUT ------------- -Transforms the source matrix into the destination matrix using the given look-up table: ``dst(I) = lut(src(I))`` - -.. ocv:function:: void gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src: Source matrix. ``CV_8UC1`` and ``CV_8UC3`` matrices are supported for now. - - :param lut: Look-up table of 256 elements. It is a continuous ``CV_8U`` matrix. - - :param dst: Destination matrix with the same depth as ``lut`` and the same number of channels as ``src`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`LUT` diff --git a/modules/gpuarithm/doc/element_operations.rst b/modules/gpuarithm/doc/element_operations.rst index eae2ad7a2..eb616c1c3 100644 --- a/modules/gpuarithm/doc/element_operations.rst +++ b/modules/gpuarithm/doc/element_operations.rst @@ -6,20 +6,16 @@ Per-element Operations gpu::add ------------- +-------- Computes a matrix-matrix or matrix-scalar sum. -.. ocv:function:: void gpu::add( const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::add(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::add( const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() ) + :param src1: First source matrix or scalar. - :param a: First source matrix. + :param src2: Second source matrix or scalar. Matrix should have the same size and type as ``src1`` . - :param b: Second source matrix to be added to ``a`` . Matrix should have the same size and type as ``a`` . - - :param sc: A scalar to be added to ``a`` . - - :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth. + :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth. :param mask: Optional operation mask, 8-bit single channel array, that specifies elements of the destination array to be changed. @@ -32,20 +28,16 @@ Computes a matrix-matrix or matrix-scalar sum. gpu::subtract ------------------ +------------- Computes a matrix-matrix or matrix-scalar difference. -.. ocv:function:: void gpu::subtract( const GpuMat& a, const GpuMat& b, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::subtract(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), int dtype = -1, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::subtract( const GpuMat& a, const Scalar& sc, GpuMat& c, const GpuMat& mask=GpuMat(), int dtype=-1, Stream& stream=Stream::Null() ) + :param src1: First source matrix or scalar. - :param a: First source matrix. + :param src2: Second source matrix or scalar. Matrix should have the same size and type as ``src1`` . - :param b: Second source matrix to be added to ``a`` . Matrix should have the same size and type as ``a`` . - - :param sc: A scalar to be added to ``a`` . - - :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth. + :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth. :param mask: Optional operation mask, 8-bit single channel array, that specifies elements of the destination array to be changed. @@ -58,20 +50,16 @@ Computes a matrix-matrix or matrix-scalar difference. gpu::multiply ------------------ +------------- Computes a matrix-matrix or matrix-scalar per-element product. -.. ocv:function:: void gpu::multiply( const GpuMat& a, const GpuMat& b, GpuMat& c, double scale=1, int dtype=-1, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::multiply(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::multiply( const GpuMat& a, const Scalar& sc, GpuMat& c, double scale=1, int dtype=-1, Stream& stream=Stream::Null() ) + :param src1: First source matrix or scalar. - :param a: First source matrix. + :param src2: Second source matrix or scalar. - :param b: Second source matrix to be multiplied by ``a`` elements. - - :param sc: A scalar to be multiplied by ``a`` elements. - - :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth. + :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth. :param scale: Optional scale factor. @@ -87,19 +75,15 @@ gpu::divide ----------- Computes a matrix-matrix or matrix-scalar division. -.. ocv:function:: void gpu::divide( const GpuMat& a, const GpuMat& b, GpuMat& c, double scale=1, int dtype=-1, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::divide(InputArray src1, InputArray src2, OutputArray dst, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::divide(const GpuMat& a, const Scalar& sc, GpuMat& c, double scale = 1, int dtype = -1, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::divide(double src1, InputArray src2, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::divide( double scale, const GpuMat& b, GpuMat& c, int dtype=-1, Stream& stream=Stream::Null() ) + :param src1: First source matrix or a scalar. - :param a: First source matrix or a scalar. + :param src2: Second source matrix or scalar. - :param b: Second source matrix. The ``a`` elements are divided by it. - - :param sc: A scalar to be divided by the elements of ``a`` matrix. - - :param c: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``a`` depth. + :param dst: Destination matrix that has the same size and number of channels as the input array(s). The depth is defined by ``dtype`` or ``src1`` depth. :param scale: Optional scale factor. @@ -113,11 +97,296 @@ This function, in contrast to :ocv:func:`divide`, uses a round-down rounding mod +gpu::absdiff +------------ +Computes per-element absolute difference of two matrices (or of a matrix and scalar). + +.. ocv:function:: void gpu::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()) + + :param src1: First source matrix or scalar. + + :param src2: Second source matrix or scalar. + + :param dst: Destination matrix that has the same size and type as the input array(s). + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`absdiff` + + + +gpu::abs +-------- +Computes an absolute value of each matrix element. + +.. ocv:function:: void gpu::abs(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`abs` + + + +gpu::sqr +-------- +Computes a square value of each matrix element. + +.. ocv:function:: void gpu::sqr(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + + + +gpu::sqrt +--------- +Computes a square root of each matrix element. + +.. ocv:function:: void gpu::sqrt(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`sqrt` + + + +gpu::exp +-------- +Computes an exponent of each matrix element. + +.. ocv:function:: void gpu::exp(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`exp` + + + +gpu::log +-------- +Computes a natural logarithm of absolute value of each matrix element. + +.. ocv:function:: void gpu::log(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`log` + + + +gpu::pow +-------- +Raises every matrix element to a power. + +.. ocv:function:: void gpu::pow(InputArray src, double power, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. + + :param power: Exponent of power. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + +The function ``pow`` raises every element of the input matrix to ``power`` : + +.. math:: + + \texttt{dst} (I) = \fork{\texttt{src}(I)^power}{if \texttt{power} is integer}{|\texttt{src}(I)|^power}{otherwise} + +.. seealso:: :ocv:func:`pow` + + + +gpu::compare +------------ +Compares elements of two matrices (or of a matrix and scalar). + +.. ocv:function:: void gpu::compare(InputArray src1, InputArray src2, OutputArray dst, int cmpop, Stream& stream = Stream::Null()) + + :param src1: First source matrix or scalar. + + :param src2: Second source matrix or scalar. + + :param dst: Destination matrix that has the same size and type as the input array(s). + + :param cmpop: Flag specifying the relation between the elements to be checked: + + * **CMP_EQ:** ``a(.) == b(.)`` + * **CMP_GT:** ``a(.) < b(.)`` + * **CMP_GE:** ``a(.) <= b(.)`` + * **CMP_LT:** ``a(.) < b(.)`` + * **CMP_LE:** ``a(.) <= b(.)`` + * **CMP_NE:** ``a(.) != b(.)`` + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`compare` + + + +gpu::bitwise_not +---------------- +Performs a per-element bitwise inversion. + +.. ocv:function:: void gpu::bitwise_not(InputArray src, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) + + :param src: Source matrix. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param mask: Optional operation mask. 8-bit single channel image. + + :param stream: Stream for the asynchronous version. + + + +gpu::bitwise_or +--------------- +Performs a per-element bitwise disjunction of two matrices (or of matrix and scalar). + +.. ocv:function:: void gpu::bitwise_or(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) + + :param src1: First source matrix or scalar. + + :param src2: Second source matrix or scalar. + + :param dst: Destination matrix that has the same size and type as the input array(s). + + :param mask: Optional operation mask. 8-bit single channel image. + + :param stream: Stream for the asynchronous version. + + + +gpu::bitwise_and +---------------- +Performs a per-element bitwise conjunction of two matrices (or of matrix and scalar). + +.. ocv:function:: void gpu::bitwise_and(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) + + :param src1: First source matrix or scalar. + + :param src2: Second source matrix or scalar. + + :param dst: Destination matrix that has the same size and type as the input array(s). + + :param mask: Optional operation mask. 8-bit single channel image. + + :param stream: Stream for the asynchronous version. + + + +gpu::bitwise_xor +---------------- +Performs a per-element bitwise ``exclusive or`` operation of two matrices (or of matrix and scalar). + +.. ocv:function:: void gpu::bitwise_xor(InputArray src1, InputArray src2, OutputArray dst, InputArray mask = noArray(), Stream& stream = Stream::Null()) + + :param src1: First source matrix or scalar. + + :param src2: Second source matrix or scalar. + + :param dst: Destination matrix that has the same size and type as the input array(s). + + :param mask: Optional operation mask. 8-bit single channel image. + + :param stream: Stream for the asynchronous version. + + + +gpu::rshift +----------- +Performs pixel by pixel right shift of an image by a constant value. + +.. ocv:function:: void gpu::rshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. Supports 1, 3 and 4 channels images with integers elements. + + :param val: Constant values, one per channel. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + + + +gpu::lshift +----------- +Performs pixel by pixel right left of an image by a constant value. + +.. ocv:function:: void gpu::lshift(InputArray src, Scalar_ val, OutputArray dst, Stream& stream = Stream::Null()) + + :param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32S`` depth. + + :param val: Constant values, one per channel. + + :param dst: Destination matrix with the same size and type as ``src`` . + + :param stream: Stream for the asynchronous version. + + + +gpu::min +-------- +Computes the per-element minimum of two matrices (or a matrix and a scalar). + +.. ocv:function:: void gpu::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()) + + :param src1: First source matrix or scalar. + + :param src2: Second source matrix or scalar. + + :param dst: Destination matrix that has the same size and type as the input array(s). + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`min` + + + +gpu::max +-------- +Computes the per-element maximum of two matrices (or a matrix and a scalar). + +.. ocv:function:: void gpu::max(InputArray src1, InputArray src2, OutputArray dst, Stream& stream = Stream::Null()) + + :param src1: First source matrix or scalar. + + :param src2: Second source matrix or scalar. + + :param dst: Destination matrix that has the same size and type as the input array(s). + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`max` + + + gpu::addWeighted ---------------- Computes the weighted sum of two arrays. -.. ocv:function:: void gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, double beta, double gamma, GpuMat& dst, int dtype = -1, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::addWeighted(InputArray src1, double alpha, InputArray src2, double beta, double gamma, OutputArray dst, int dtype = -1, Stream& stream = Stream::Null()) :param src1: First source array. @@ -147,311 +416,11 @@ where ``I`` is a multi-dimensional index of array elements. In case of multi-cha -gpu::abs ------------- -Computes an absolute value of each matrix element. - -.. ocv:function:: void gpu::abs(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src: Source matrix. Supports ``CV_16S`` and ``CV_32F`` depth. - - :param dst: Destination matrix with the same size and type as ``src`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`abs` - - - -gpu::sqr ------------- -Computes a square value of each matrix element. - -.. ocv:function:: void gpu::sqr(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth. - - :param dst: Destination matrix with the same size and type as ``src`` . - - :param stream: Stream for the asynchronous version. - - - -gpu::sqrt ------------- -Computes a square root of each matrix element. - -.. ocv:function:: void gpu::sqrt(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth. - - :param dst: Destination matrix with the same size and type as ``src`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`sqrt` - - - -gpu::exp ------------- -Computes an exponent of each matrix element. - -.. ocv:function:: void gpu::exp( const GpuMat& a, GpuMat& b, Stream& stream=Stream::Null() ) - - :param a: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth. - - :param b: Destination matrix with the same size and type as ``a`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`exp` - - - -gpu::log ------------- -Computes a natural logarithm of absolute value of each matrix element. - -.. ocv:function:: void gpu::log( const GpuMat& a, GpuMat& b, Stream& stream=Stream::Null() ) - - :param a: Source matrix. Supports ``CV_8U`` , ``CV_16U`` , ``CV_16S`` and ``CV_32F`` depth. - - :param b: Destination matrix with the same size and type as ``a`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`log` - - - -gpu::pow ------------- -Raises every matrix element to a power. - -.. ocv:function:: void gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src: Source matrix. Supports all type, except ``CV_64F`` depth. - - :param power: Exponent of power. - - :param dst: Destination matrix with the same size and type as ``src`` . - - :param stream: Stream for the asynchronous version. - -The function ``pow`` raises every element of the input matrix to ``p`` : - -.. math:: - - \texttt{dst} (I) = \fork{\texttt{src}(I)^p}{if \texttt{p} is integer}{|\texttt{src}(I)|^p}{otherwise} - -.. seealso:: :ocv:func:`pow` - - - -gpu::absdiff ----------------- -Computes per-element absolute difference of two matrices (or of a matrix and scalar). - -.. ocv:function:: void gpu::absdiff( const GpuMat& a, const GpuMat& b, GpuMat& c, Stream& stream=Stream::Null() ) - -.. ocv:function:: void gpu::absdiff( const GpuMat& a, const Scalar& s, GpuMat& c, Stream& stream=Stream::Null() ) - - :param a: First source matrix. - - :param b: Second source matrix to be added to ``a`` . - - :param s: A scalar to be added to ``a`` . - - :param c: Destination matrix with the same size and type as ``a`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`absdiff` - - - -gpu::compare ----------------- -Compares elements of two matrices. - -.. ocv:function:: void gpu::compare( const GpuMat& a, const GpuMat& b, GpuMat& c, int cmpop, Stream& stream=Stream::Null() ) - -.. ocv:function:: void gpu::compare(const GpuMat& a, Scalar sc, GpuMat& c, int cmpop, Stream& stream = Stream::Null()) - - :param a: First source matrix. - - :param b: Second source matrix with the same size and type as ``a`` . - - :param sc: A scalar to be compared with ``a`` . - - :param c: Destination matrix with the same size as ``a`` and the ``CV_8UC1`` type. - - :param cmpop: Flag specifying the relation between the elements to be checked: - - * **CMP_EQ:** ``a(.) == b(.)`` - * **CMP_GT:** ``a(.) < b(.)`` - * **CMP_GE:** ``a(.) <= b(.)`` - * **CMP_LT:** ``a(.) < b(.)`` - * **CMP_LE:** ``a(.) <= b(.)`` - * **CMP_NE:** ``a(.) != b(.)`` - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`compare` - - - -gpu::bitwise_not --------------------- -Performs a per-element bitwise inversion. - -.. ocv:function:: void gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()) - - :param src: Source matrix. - - :param dst: Destination matrix with the same size and type as ``src`` . - - :param mask: Optional operation mask. 8-bit single channel image. - - :param stream: Stream for the asynchronous version. - - - -gpu::bitwise_or -------------------- -Performs a per-element bitwise disjunction of two matrices or of matrix and scalar. - -.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::bitwise_or(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src1: First source matrix. - - :param src2: Second source matrix with the same size and type as ``src1`` . - - :param dst: Destination matrix with the same size and type as ``src1`` . - - :param mask: Optional operation mask. 8-bit single channel image. - - :param stream: Stream for the asynchronous version. - - - -gpu::bitwise_and --------------------- -Performs a per-element bitwise conjunction of two matrices or of matrix and scalar. - -.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::bitwise_and(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src1: First source matrix. - - :param src2: Second source matrix with the same size and type as ``src1`` . - - :param dst: Destination matrix with the same size and type as ``src1`` . - - :param mask: Optional operation mask. 8-bit single channel image. - - :param stream: Stream for the asynchronous version. - - - -gpu::bitwise_xor --------------------- -Performs a per-element bitwise ``exclusive or`` operation of two matrices of matrix and scalar. - -.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask=GpuMat(), Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::bitwise_xor(const GpuMat& src1, const Scalar& sc, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src1: First source matrix. - - :param src2: Second source matrix with the same size and type as ``src1`` . - - :param dst: Destination matrix with the same size and type as ``src1`` . - - :param mask: Optional operation mask. 8-bit single channel image. - - :param stream: Stream for the asynchronous version. - - - -gpu::rshift --------------------- -Performs pixel by pixel right shift of an image by a constant value. - -.. ocv:function:: void gpu::rshift( const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream=Stream::Null() ) - - :param src: Source matrix. Supports 1, 3 and 4 channels images with integers elements. - - :param sc: Constant values, one per channel. - - :param dst: Destination matrix with the same size and type as ``src`` . - - :param stream: Stream for the asynchronous version. - - - -gpu::lshift --------------------- -Performs pixel by pixel right left of an image by a constant value. - -.. ocv:function:: void gpu::lshift( const GpuMat& src, Scalar_ sc, GpuMat& dst, Stream& stream=Stream::Null() ) - - :param src: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U`` , ``CV_16U`` or ``CV_32S`` depth. - - :param sc: Constant values, one per channel. - - :param dst: Destination matrix with the same size and type as ``src`` . - - :param stream: Stream for the asynchronous version. - - - -gpu::min ------------- -Computes the per-element minimum of two matrices (or a matrix and a scalar). - -.. ocv:function:: void gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()) - -.. ocv:function:: void gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src1: First source matrix. - - :param src2: Second source matrix or a scalar to compare ``src1`` elements with. - - :param dst: Destination matrix with the same size and type as ``src1`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`min` - - - -gpu::max ------------- -Computes the per-element maximum of two matrices (or a matrix and a scalar). - -.. ocv:function:: void gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream = Stream::Null()) - -.. ocv:function:: void gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream = Stream::Null()) - - :param src1: First source matrix. - - :param src2: Second source matrix or a scalar to compare ``src1`` elements with. - - :param dst: Destination matrix with the same size and type as ``src1`` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`max` - - - gpu::threshold ------------------- +-------------- Applies a fixed-level threshold to each array element. -.. ocv:function:: double gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()) +.. ocv:function:: double gpu::threshold(InputArray src, OutputArray dst, double thresh, double maxval, int type, Stream& stream = Stream::Null()) :param src: Source array (single-channel). @@ -470,12 +439,12 @@ Applies a fixed-level threshold to each array element. gpu::magnitude ------------------- +-------------- Computes magnitudes of complex matrix elements. -.. ocv:function:: void gpu::magnitude( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::magnitude(InputArray xy, OutputArray magnitude, Stream& stream = Stream::Null()) -.. ocv:function:: void gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::magnitude(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()) :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ). @@ -492,12 +461,12 @@ Computes magnitudes of complex matrix elements. gpu::magnitudeSqr ---------------------- +----------------- Computes squared magnitudes of complex matrix elements. -.. ocv:function:: void gpu::magnitudeSqr( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() ) +.. ocv:function:: void gpu::magnitudeSqr(InputArray xy, OutputArray magnitude, Stream& stream=Stream::Null() ) -.. ocv:function:: void gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::magnitudeSqr(InputArray x, InputArray y, OutputArray magnitude, Stream& stream = Stream::Null()) :param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ). @@ -512,10 +481,10 @@ Computes squared magnitudes of complex matrix elements. gpu::phase --------------- +---------- Computes polar angles of complex matrix elements. -.. ocv:function:: void gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::phase(InputArray x, InputArray y, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()) :param x: Source matrix containing real components ( ``CV_32FC1`` ). @@ -532,10 +501,10 @@ Computes polar angles of complex matrix elements. gpu::cartToPolar --------------------- +---------------- Converts Cartesian coordinates into polar. -.. ocv:function:: void gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::cartToPolar(InputArray x, InputArray y, OutputArray magnitude, OutputArray angle, bool angleInDegrees = false, Stream& stream = Stream::Null()) :param x: Source matrix containing real components ( ``CV_32FC1`` ). @@ -554,10 +523,10 @@ Converts Cartesian coordinates into polar. gpu::polarToCart --------------------- +---------------- Converts polar coordinates into Cartesian. -.. ocv:function:: void gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees=false, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::polarToCart(InputArray magnitude, InputArray angle, OutputArray x, OutputArray y, bool angleInDegrees = false, Stream& stream = Stream::Null()) :param magnitude: Source matrix containing magnitudes ( ``CV_32FC1`` ). diff --git a/modules/gpuarithm/doc/reductions.rst b/modules/gpuarithm/doc/reductions.rst index 938efc35b..b34c2d860 100644 --- a/modules/gpuarithm/doc/reductions.rst +++ b/modules/gpuarithm/doc/reductions.rst @@ -6,16 +6,16 @@ Matrix Reductions gpu::norm -------------- +--------- Returns the norm of a matrix (or difference of two matrices). -.. ocv:function:: double gpu::norm(const GpuMat& src1, int normType=NORM_L2) +.. ocv:function:: double gpu::norm(InputArray src1, int normType) -.. ocv:function:: double gpu::norm(const GpuMat& src1, int normType, GpuMat& buf) +.. ocv:function:: double gpu::norm(InputArray src1, int normType, GpuMat& buf) -.. ocv:function:: double gpu::norm(const GpuMat& src1, int normType, const GpuMat& mask, GpuMat& buf) +.. ocv:function:: double gpu::norm(InputArray src1, int normType, InputArray mask, GpuMat& buf) -.. ocv:function:: double gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType=NORM_L2) +.. ocv:function:: double gpu::norm(InputArray src1, InputArray src2, int normType=NORM_L2) :param src1: Source matrix. Any matrices except 64F are supported. @@ -32,14 +32,14 @@ Returns the norm of a matrix (or difference of two matrices). gpu::sum ------------- +-------- Returns the sum of matrix elements. -.. ocv:function:: Scalar gpu::sum(const GpuMat& src) +.. ocv:function:: Scalar gpu::sum(InputArray src) -.. ocv:function:: Scalar gpu::sum(const GpuMat& src, GpuMat& buf) +.. ocv:function:: Scalar gpu::sum(InputArray src, GpuMat& buf) -.. ocv:function:: Scalar gpu::sum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) +.. ocv:function:: Scalar gpu::sum(InputArray src, InputArray mask, GpuMat& buf) :param src: Source image of any depth except for ``CV_64F`` . @@ -52,14 +52,14 @@ Returns the sum of matrix elements. gpu::absSum ---------------- +----------- Returns the sum of absolute values for matrix elements. -.. ocv:function:: Scalar gpu::absSum(const GpuMat& src) +.. ocv:function:: Scalar gpu::absSum(InputArray src) -.. ocv:function:: Scalar gpu::absSum(const GpuMat& src, GpuMat& buf) +.. ocv:function:: Scalar gpu::absSum(InputArray src, GpuMat& buf) -.. ocv:function:: Scalar gpu::absSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) +.. ocv:function:: Scalar gpu::absSum(InputArray src, InputArray mask, GpuMat& buf) :param src: Source image of any depth except for ``CV_64F`` . @@ -70,14 +70,14 @@ Returns the sum of absolute values for matrix elements. gpu::sqrSum ---------------- +----------- Returns the squared sum of matrix elements. -.. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src) +.. ocv:function:: Scalar gpu::sqrSum(InputArray src) -.. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src, GpuMat& buf) +.. ocv:function:: Scalar gpu::sqrSum(InputArray src, GpuMat& buf) -.. ocv:function:: Scalar gpu::sqrSum(const GpuMat& src, const GpuMat& mask, GpuMat& buf) +.. ocv:function:: Scalar gpu::sqrSum(InputArray src, InputArray mask, GpuMat& buf) :param src: Source image of any depth except for ``CV_64F`` . @@ -88,12 +88,12 @@ Returns the squared sum of matrix elements. gpu::minMax ---------------- +----------- Finds global minimum and maximum matrix elements and returns their values. -.. ocv:function:: void gpu::minMax(const GpuMat& src, double* minVal, double* maxVal=0, const GpuMat& mask=GpuMat()) +.. ocv:function:: void gpu::minMax(InputArray src, double* minVal, double* maxVal=0, InputArray mask=noArray()) -.. ocv:function:: void gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const GpuMat& mask, GpuMat& buf) +.. ocv:function:: void gpu::minMax(InputArray src, double* minVal, double* maxVal, InputArray mask, GpuMat& buf) :param src: Single-channel source image. @@ -112,12 +112,12 @@ The function does not work with ``CV_64F`` images on GPUs with the compute capab gpu::minMaxLoc ------------------- +-------------- Finds global minimum and maximum matrix elements and returns their values with locations. -.. ocv:function:: void gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, const GpuMat& mask=GpuMat()) +.. ocv:function:: void gpu::minMaxLoc(InputArray src, double* minVal, double* maxVal=0, Point* minLoc=0, Point* maxLoc=0, InputArray mask=noArray()) -.. ocv:function:: void gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask, GpuMat& valbuf, GpuMat& locbuf) +.. ocv:function:: void gpu::minMaxLoc(InputArray src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray mask, GpuMat& valbuf, GpuMat& locbuf) :param src: Single-channel source image. @@ -142,12 +142,12 @@ Finds global minimum and maximum matrix elements and returns their values with l gpu::countNonZero ---------------------- +----------------- Counts non-zero matrix elements. -.. ocv:function:: int gpu::countNonZero(const GpuMat& src) +.. ocv:function:: int gpu::countNonZero(InputArray src) -.. ocv:function:: int gpu::countNonZero(const GpuMat& src, GpuMat& buf) +.. ocv:function:: int gpu::countNonZero(InputArray src, GpuMat& buf) :param src: Single-channel source image. @@ -163,7 +163,7 @@ gpu::reduce ----------- Reduces a matrix to a vector. -.. ocv:function:: void gpu::reduce(const GpuMat& mtx, GpuMat& vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, int dtype = -1, Stream& stream = Stream::Null()) :param mtx: Source 2D matrix. @@ -183,48 +183,20 @@ Reduces a matrix to a vector. :param dtype: When it is negative, the destination vector will have the same type as the source matrix. Otherwise, its type will be ``CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), mtx.channels())`` . + :param stream: Stream for the asynchronous version. + The function ``reduce`` reduces the matrix to a vector by treating the matrix rows/columns as a set of 1D vectors and performing the specified operation on the vectors until a single row/column is obtained. For example, the function can be used to compute horizontal and vertical projections of a raster image. In case of ``CV_REDUCE_SUM`` and ``CV_REDUCE_AVG`` , the output may have a larger element bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction modes. .. seealso:: :ocv:func:`reduce` -gpu::normalize --------------- -Normalizes the norm or value range of an array. - -.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat()) - -.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf) - - :param src: input array. - - :param dst: output array of the same size as ``src`` . - - :param alpha: norm value to normalize to or the lower range boundary in case of the range normalization. - - :param beta: upper range boundary in case of the range normalization; it is not used for the norm normalization. - - :param normType: normalization type (see the details below). - - :param dtype: when negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``. - - :param mask: optional operation mask. - - :param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically. - - :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically. - -.. seealso:: :ocv:func:`normalize` - - - gpu::meanStdDev -------------------- +--------------- Computes a mean value and a standard deviation of matrix elements. -.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev) -.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf) +.. ocv:function:: void gpu::meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev) +.. ocv:function:: void gpu::meanStdDev(InputArray mtx, Scalar& mean, Scalar& stddev, GpuMat& buf) :param mtx: Source matrix. ``CV_8UC1`` matrices are supported for now. @@ -239,10 +211,10 @@ Computes a mean value and a standard deviation of matrix elements. gpu::rectStdDev -------------------- +--------------- Computes a standard deviation of integral images. -.. ocv:function:: void gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::rectStdDev(InputArray src, InputArray sqr, OutputArray dst, Rect rect, Stream& stream = Stream::Null()) :param src: Source image. Only the ``CV_32SC1`` type is supported. @@ -253,3 +225,71 @@ Computes a standard deviation of integral images. :param rect: Rectangular window. :param stream: Stream for the asynchronous version. + + + +gpu::normalize +-------------- +Normalizes the norm or value range of an array. + +.. ocv:function:: void gpu::normalize(InputArray src, OutputArray dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, InputArray mask = noArray()) + +.. ocv:function:: void gpu::normalize(InputArray src, OutputArray dst, double alpha, double beta, int norm_type, int dtype, InputArray mask, GpuMat& norm_buf, GpuMat& cvt_buf) + + :param src: Input array. + + :param dst: Output array of the same size as ``src`` . + + :param alpha: Norm value to normalize to or the lower range boundary in case of the range normalization. + + :param beta: Upper range boundary in case of the range normalization; it is not used for the norm normalization. + + :param normType: Normalization type ( ``NORM_MINMAX`` , ``NORM_L2`` , ``NORM_L1`` or ``NORM_INF`` ). + + :param dtype: When negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``. + + :param mask: Optional operation mask. + + :param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically. + + :param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically. + +.. seealso:: :ocv:func:`normalize` + + + +gpu::integral +------------- +Computes an integral image. + +.. ocv:function:: void gpu::integral(InputArray src, OutputArray sum, Stream& stream = Stream::Null()) + +.. ocv:function:: void gpu::integral(InputArray src, OutputArray sum, GpuMat& buffer, Stream& stream = Stream::Null()) + + :param src: Source image. Only ``CV_8UC1`` images are supported for now. + + :param sum: Integral image containing 32-bit unsigned integer values packed into ``CV_32SC1`` . + + :param buffer: Optional buffer to avoid extra memory allocations. It is resized automatically. + + :param stream: Stream for the asynchronous version. + +.. seealso:: :ocv:func:`integral` + + + +gpu::sqrIntegral +---------------- +Computes a squared integral image. + +.. ocv:function:: void gpu::sqrIntegral(InputArray src, OutputArray sqsum, Stream& stream = Stream::Null()) + +.. ocv:function:: void gpu::sqrIntegral(InputArray src, OutputArray sqsum, GpuMat& buf, Stream& stream = Stream::Null()) + + :param src: Source image. Only ``CV_8UC1`` images are supported for now. + + :param sqsum: Squared integral image containing 64-bit unsigned integer values packed into ``CV_64FC1`` . + + :param buf: Optional buffer to avoid extra memory allocations. It is resized automatically. + + :param stream: Stream for the asynchronous version. diff --git a/modules/gpufilters/doc/filtering.rst b/modules/gpufilters/doc/filtering.rst index 348a42510..79c2ea51c 100644 --- a/modules/gpufilters/doc/filtering.rst +++ b/modules/gpufilters/doc/filtering.rst @@ -381,7 +381,7 @@ Creates a non-separable linear filter. :param dstType: Output image type. The same type as ``src`` is supported. - :param kernel: 2D array of filter coefficients. Floating-point coefficients will be converted to fixed-point representation before the actual processing. Supports size up to 16. For larger kernels use :ocv:func:`gpu::convolve`. + :param kernel: 2D array of filter coefficients. Floating-point coefficients will be converted to fixed-point representation before the actual processing. Supports size up to 16. For larger kernels use :ocv:class:`gpu::Convolution`. :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center. @@ -411,7 +411,7 @@ Applies the non-separable 2D linear filter to an image. :param stream: Stream for the asynchronous version. -.. seealso:: :ocv:func:`filter2D`, :ocv:func:`gpu::convolve` +.. seealso:: :ocv:func:`filter2D`, :ocv:class:`gpu::Convolution` From 8eb6decb251f74f1a68f982b2057c599fc5545c0 Mon Sep 17 00:00:00 2001 From: Andrew Senin Date: Tue, 11 Jun 2013 21:17:31 +0400 Subject: [PATCH 048/121] Fixed Ximea cameras support --- cmake/OpenCVFindXimea.cmake | 14 ++-- modules/highgui/CMakeLists.txt | 6 +- modules/highgui/src/cap_ximea.cpp | 129 ++++++++++++++++++------------ 3 files changed, 93 insertions(+), 56 deletions(-) diff --git a/cmake/OpenCVFindXimea.cmake b/cmake/OpenCVFindXimea.cmake index 5600275f4..27e2a78ad 100644 --- a/cmake/OpenCVFindXimea.cmake +++ b/cmake/OpenCVFindXimea.cmake @@ -9,6 +9,7 @@ # # Created: 5 Aug 2011 by Marian Zajko (marian.zajko@ximea.com) # Updated: 25 June 2012 by Igor Kuzmin (parafin@ximea.com) +# Updated: 22 October 2012 by Marian Zajko (marian.zajko@ximea.com) # set(XIMEA_FOUND) @@ -18,11 +19,15 @@ set(XIMEA_LIBRARY_DIR) if(WIN32) # Try to find the XIMEA API path in registry. GET_FILENAME_COMPONENT(XIMEA_PATH "[HKEY_CURRENT_USER\\Software\\XIMEA\\CamSupport\\API;Path]" ABSOLUTE) - - if(EXISTS XIMEA_PATH) + + if(EXISTS ${XIMEA_PATH}) set(XIMEA_FOUND 1) # set LIB folders - set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86") + if(CMAKE_CL_64) + set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x64") + else() + set(XIMEA_LIBRARY_DIR "${XIMEA_PATH}/x86") + endif() else() set(XIMEA_FOUND 0) endif() @@ -38,5 +43,4 @@ endif() mark_as_advanced(FORCE XIMEA_FOUND) mark_as_advanced(FORCE XIMEA_PATH) -mark_as_advanced(FORCE XIMEA_LIBRARY_DIR) - +mark_as_advanced(FORCE XIMEA_LIBRARY_DIR) \ No newline at end of file diff --git a/modules/highgui/CMakeLists.txt b/modules/highgui/CMakeLists.txt index fad2562c8..05ab99a78 100644 --- a/modules/highgui/CMakeLists.txt +++ b/modules/highgui/CMakeLists.txt @@ -179,7 +179,11 @@ if(HAVE_XIMEA) if(XIMEA_LIBRARY_DIR) link_directories(${XIMEA_LIBRARY_DIR}) endif() - list(APPEND HIGHGUI_LIBRARIES m3api) + if(CMAKE_CL_64) + list(APPEND HIGHGUI_LIBRARIES m3apiX64) + else() + list(APPEND HIGHGUI_LIBRARIES m3api) + endif() endif(HAVE_XIMEA) if(HAVE_FFMPEG) diff --git a/modules/highgui/src/cap_ximea.cpp b/modules/highgui/src/cap_ximea.cpp index dbb8f5868..5acf2c09d 100644 --- a/modules/highgui/src/cap_ximea.cpp +++ b/modules/highgui/src/cap_ximea.cpp @@ -20,25 +20,24 @@ public: virtual IplImage* retrieveFrame(int); virtual int getCaptureDomain() { return CV_CAP_XIAPI; } // Return the type of the capture object: CV_CAP_VFW, etc... -protected: +private: void init(); void errMsg(const char* msg, int errNum); + void resetCvImage(); + int getBpp(); IplImage* frame; HANDLE hmv; DWORD numDevices; - XI_IMG image; - int width; - int height; - int format; int timeout; + XI_IMG image; }; /**********************************************************************************/ CvCapture* cvCreateCameraCapture_XIMEA( int index ) { - CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA; + CvCaptureCAM_XIMEA* capture = new CvCaptureCAM_XIMEA; if( capture->open( index )) return capture; @@ -79,18 +78,19 @@ bool CvCaptureCAM_XIMEA::open( int wIndex ) // always use auto white ballance mvret = xiSetParamInt( hmv, XI_PRM_AUTO_WB, 1); if(mvret != XI_OK) goto error; + + // default image format RGB24 + mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, XI_RGB24); + if(mvret != XI_OK) goto error; + int width = 0; mvret = xiGetParamInt( hmv, XI_PRM_WIDTH, &width); if(mvret != XI_OK) goto error; + int height = 0; mvret = xiGetParamInt( hmv, XI_PRM_HEIGHT, &height); if(mvret != XI_OK) goto error; - // default image format RGB24 - format = XI_RGB24; - mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, format); - if(mvret != XI_OK) goto error; - // allocate frame buffer for RGB24 image frame = cvCreateImage(cvSize( width, height), IPL_DEPTH_8U, 3); @@ -103,10 +103,10 @@ bool CvCaptureCAM_XIMEA::open( int wIndex ) errMsg("StartAcquisition XI_DEVICE failed", mvret); goto error; } - return true; error: + errMsg("Open XI_DEVICE failed", mvret); xiCloseDevice(hmv); hmv = NULL; return false; @@ -116,18 +116,19 @@ error: void CvCaptureCAM_XIMEA::close() { - if(hmv) - { - xiStopAcquisition(hmv); - xiCloseDevice(hmv); - hmv = NULL; - } + if(frame) + cvReleaseImage(&frame); + + xiStopAcquisition(hmv); + xiCloseDevice(hmv); + hmv = NULL; } /**********************************************************************************/ bool CvCaptureCAM_XIMEA::grabFrame() { + memset(&image, 0, sizeof(XI_IMG)); image.size = sizeof(XI_IMG); int mvret = xiGetImage( hmv, timeout, &image); @@ -151,31 +152,18 @@ bool CvCaptureCAM_XIMEA::grabFrame() IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int) { // update cvImage after format has changed - if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format) - { - cvReleaseImage(&frame); - switch( image.frm) - { - case XI_MONO8 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break; - case XI_MONO16 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break; - case XI_RGB24 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break; - case XI_RGB32 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break; - default : - return frame; - } - // update global image format - format = image.frm; - width = image.width; - height = image.height; - } - + resetCvImage(); + // copy pixel data switch( image.frm) { - case XI_MONO8 : memcpy( frame->imageData, image.bp, image.width*image.height); break; - case XI_MONO16 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break; - case XI_RGB24 : memcpy( frame->imageData, image.bp, image.width*image.height*3); break; - case XI_RGB32 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(DWORD)); break; + case XI_MONO8 : + case XI_RAW8 : memcpy( frame->imageData, image.bp, image.width*image.height); break; + case XI_MONO16 : + case XI_RAW16 : memcpy( frame->imageData, image.bp, image.width*image.height*sizeof(WORD)); break; + case XI_RGB24 : + case XI_RGB_PLANAR : memcpy( frame->imageData, image.bp, image.width*image.height*3); break; + case XI_RGB32 : memcpy( frame->imageData, image.bp, image.width*image.height*4); break; default: break; } return frame; @@ -183,6 +171,35 @@ IplImage* CvCaptureCAM_XIMEA::retrieveFrame(int) /**********************************************************************************/ +void CvCaptureCAM_XIMEA::resetCvImage() +{ + int width = 0, height = 0, format = 0; + xiGetParamInt( hmv, XI_PRM_WIDTH, &width); + xiGetParamInt( hmv, XI_PRM_HEIGHT, &height); + xiGetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, &format); + + if( (int)image.width != width || (int)image.height != height || image.frm != (XI_IMG_FORMAT)format) + { + if(frame) cvReleaseImage(&frame); + frame = NULL; + + switch( image.frm) + { + case XI_MONO8 : + case XI_RAW8 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 1); break; + case XI_MONO16 : + case XI_RAW16 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_16U, 1); break; + case XI_RGB24 : + case XI_RGB_PLANAR : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 3); break; + case XI_RGB32 : frame = cvCreateImage(cvSize( image.width, image.height), IPL_DEPTH_8U, 4); break; + default : + return; + } + } + cvZero(frame); +} +/**********************************************************************************/ + double CvCaptureCAM_XIMEA::getProperty( int property_id ) { if(hmv == NULL) @@ -238,20 +255,14 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value ) switch(property_id) { // OCV parameters - case CV_CAP_PROP_FRAME_WIDTH : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival); - if(mvret == XI_OK) width = ival; - break; - case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival); - if(mvret == XI_OK) height = ival; - break; + case CV_CAP_PROP_FRAME_WIDTH : mvret = xiSetParamInt( hmv, XI_PRM_WIDTH, ival); break; + case CV_CAP_PROP_FRAME_HEIGHT : mvret = xiSetParamInt( hmv, XI_PRM_HEIGHT, ival); break; case CV_CAP_PROP_FPS : mvret = xiSetParamFloat( hmv, XI_PRM_FRAMERATE, fval); break; case CV_CAP_PROP_GAIN : mvret = xiSetParamFloat( hmv, XI_PRM_GAIN, fval); break; case CV_CAP_PROP_EXPOSURE : mvret = xiSetParamInt( hmv, XI_PRM_EXPOSURE, ival); break; // XIMEA camera properties case CV_CAP_PROP_XI_DOWNSAMPLING : mvret = xiSetParamInt( hmv, XI_PRM_DOWNSAMPLING, ival); break; - case CV_CAP_PROP_XI_DATA_FORMAT : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival); - if(mvret == XI_OK) format = ival; - break; + case CV_CAP_PROP_XI_DATA_FORMAT : mvret = xiSetParamInt( hmv, XI_PRM_IMAGE_DATA_FORMAT, ival); break; case CV_CAP_PROP_XI_OFFSET_X : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_X, ival); break; case CV_CAP_PROP_XI_OFFSET_Y : mvret = xiSetParamInt( hmv, XI_PRM_OFFSET_Y, ival); break; case CV_CAP_PROP_XI_TRG_SOURCE : mvret = xiSetParamInt( hmv, XI_PRM_TRG_SOURCE, ival); break; @@ -288,7 +299,7 @@ bool CvCaptureCAM_XIMEA::setProperty( int property_id, double value ) void CvCaptureCAM_XIMEA::errMsg(const char* msg, int errNum) { #if defined WIN32 || defined _WIN32 - char buf[512]; + char buf[512]=""; sprintf( buf, "%s : %d\n", msg, errNum); OutputDebugString(buf); #else @@ -296,4 +307,22 @@ void CvCaptureCAM_XIMEA::errMsg(const char* msg, int errNum) #endif } +/**********************************************************************************/ + +int CvCaptureCAM_XIMEA::getBpp() +{ + switch( image.frm) + { + case XI_MONO8 : + case XI_RAW8 : return 1; + case XI_MONO16 : + case XI_RAW16 : return 2; + case XI_RGB24 : + case XI_RGB_PLANAR : return 3; + case XI_RGB32 : return 4; + default : + return 0; + } +} + /**********************************************************************************/ \ No newline at end of file From d9ab22e4ed63ca53634c74d7c022d60106879f23 Mon Sep 17 00:00:00 2001 From: Peng Xiao Date: Wed, 12 Jun 2013 13:55:20 +0800 Subject: [PATCH 049/121] Fix two bugs related to opencl context. 1. As getDevice will implicitly call setDevice, in getContext we should not need to call it again. 2. Fix an incorrect type casting. --- modules/ocl/src/initialization.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index a9cd08b9f..71289f621 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -930,8 +930,6 @@ namespace cv clCxt.reset(new Context); std::vector oclinfo; CV_Assert(getDevice(oclinfo, CVCL_DEVICE_TYPE_ALL) > 0); - oclinfo[0].impl->setDevice(0, 0, 0); - clCxt.get()->impl = oclinfo[0].impl->copy(); *((volatile int*)&val) = 1; } @@ -1056,7 +1054,7 @@ BOOL WINAPI DllMain( HINSTANCE, DWORD fdwReason, LPVOID ) Context* cv_ctx = Context::getContext(); if(cv_ctx) { - cl_context ctx = (cl_context)&(cv_ctx->impl->oclcontext); + cl_context ctx = cv_ctx->impl->oclcontext; if(ctx) openCLSafeCall(clReleaseContext(ctx)); } From 6d66d11046bb526d508e9543ecc37cfee91f4435 Mon Sep 17 00:00:00 2001 From: Dirk Van Haerenborgh Date: Wed, 12 Jun 2013 16:58:16 +0200 Subject: [PATCH 050/121] gstreamer: cleaning up resources --- modules/highgui/src/cap_gstreamer.cpp | 42 +++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/modules/highgui/src/cap_gstreamer.cpp b/modules/highgui/src/cap_gstreamer.cpp index b8f4eb83f..a347a7436 100644 --- a/modules/highgui/src/cap_gstreamer.cpp +++ b/modules/highgui/src/cap_gstreamer.cpp @@ -1030,6 +1030,19 @@ void CvVideoWriter_GStreamer::close() handleMessage(pipeline); gst_object_unref (GST_OBJECT (pipeline)); + + if (source) + gst_object_unref (GST_OBJECT (source)); + + if (encodebin) + gst_object_unref (GST_OBJECT (encodebin)); + + if (file) + gst_object_unref (GST_OBJECT (file)); + + if (buffer) + gst_object_unref (GST_OBJECT (buffer)); + } } @@ -1155,6 +1168,35 @@ bool CvVideoWriter_GStreamer::open( const char * filename, int fourcc, source = gst_bin_get_by_name(GST_BIN(encodebin), "appsrc0"); } +// GstIterator *it = gst_bin_iterate_sources (GST_BIN(encodebin)); + + +// gboolean done = FALSE; +// GstElement *item = NULL; + +// while (!done) { +// switch (gst_iterator_next (it, &item)) { +// case GST_ITERATOR_OK: +// source = item; +// gst_object_unref (item); +// done = TRUE; +// break; +// case GST_ITERATOR_RESYNC: +// gst_iterator_resync (it); +// break; +// case GST_ITERATOR_ERROR: +// done = TRUE; +// break; +// case GST_ITERATOR_DONE: +// done = TRUE; +// break; +// } +// } +// gst_iterator_free (it); + + + + if (!source){ CV_ERROR(CV_StsError, "GStreamer: cannot find appsrc in manual pipeline\n"); return false; From 5fd724b54a1143bed2d3aa0ff8f5a1ec0bc61e30 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Thu, 13 Jun 2013 10:46:12 +0800 Subject: [PATCH 051/121] Add a function to query if global OpenCL context is initialized. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 5 ++++- modules/ocl/src/initialization.cpp | 8 ++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 4a5debf50..29021278f 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -134,6 +134,9 @@ namespace cv //getDevice also need to be called before this function CV_EXPORTS void setDeviceEx(Info &oclinfo, void *ctx, void *qu, int devnum = 0); + //returns true when global OpenCL context is initialized + CV_EXPORTS bool initialized(); + //////////////////////////////// Error handling //////////////////////// CV_EXPORTS void error(const char *error_string, const char *file, const int line, const char *func); @@ -144,7 +147,7 @@ namespace cv protected: Context(); friend class auto_ptr; - + friend bool initialized(); private: static auto_ptr clCxt; static int val; diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index a9cd08b9f..78a956719 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -917,6 +917,14 @@ namespace cv int Context::val = 0; static Mutex cs; static volatile int context_tear_down = 0; + + bool initialized() + { + return *((volatile int*)&Context::val) != 0 && + Context::clCxt->impl->clCmdQueue != NULL&& + Context::clCxt->impl->oclcontext != NULL; + } + Context* Context::getContext() { if(*((volatile int*)&val) != 1) From e433145b7e15f7ff8056d109fe17de3bf423e916 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 13 Jun 2013 10:22:56 +0400 Subject: [PATCH 052/121] fix for Bug #3085: weights array is only allocated for (l_count+1) elements, but then weights[l_count+1] element is accessed. --- modules/ml/src/ann_mlp.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ml/src/ann_mlp.cpp b/modules/ml/src/ann_mlp.cpp index bf85425b9..7323ab57a 100644 --- a/modules/ml/src/ann_mlp.cpp +++ b/modules/ml/src/ann_mlp.cpp @@ -251,7 +251,7 @@ void CvANN_MLP::create( const CvMat* _layer_sizes, int _activ_func, buf_sz += (l_dst[0] + l_dst[l_count-1]*2)*2; CV_CALL( wbuf = cvCreateMat( 1, buf_sz, CV_64F )); - CV_CALL( weights = (double**)cvAlloc( (l_count+1)*sizeof(weights[0]) )); + CV_CALL( weights = (double**)cvAlloc( (l_count+2)*sizeof(weights[0]) )); weights[0] = wbuf->data.db; weights[1] = weights[0] + l_dst[0]*2; From 982ef83f807c7c2e4285f3d24894d2e251a66fa2 Mon Sep 17 00:00:00 2001 From: Sergei Nosov Date: Thu, 13 Jun 2013 11:51:45 +0400 Subject: [PATCH 053/121] Fixes bug #3071. If we have perfect matches (min_dist == 0.0), then strict comparison fails. Making it non-strict results in treating perfect matches as good. --- .../feature_flann_matcher/feature_flann_matcher.rst | 5 +---- samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp | 2 +- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst index 47eafedbc..54d28890a 100644 --- a/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst +++ b/doc/tutorials/features2d/feature_flann_matcher/feature_flann_matcher.rst @@ -85,7 +85,7 @@ This tutorial code's is shown lines below. You can also download it from `here < std::vector< DMatch > good_matches; for( int i = 0; i < descriptors_1.rows; i++ ) - { if( matches[i].distance < 2*min_dist ) + { if( matches[i].distance <= 2*min_dist ) { good_matches.push_back( matches[i]); } } @@ -127,6 +127,3 @@ Result .. image:: images/Feature_FlannMatcher_Keypoints_Result.jpg :align: center :height: 250pt - - - diff --git a/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp b/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp index f4cde9b2e..ead7fd718 100644 --- a/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp +++ b/samples/cpp/tutorial_code/features2D/SURF_FlannMatcher.cpp @@ -70,7 +70,7 @@ int main( int argc, char** argv ) std::vector< DMatch > good_matches; for( int i = 0; i < descriptors_1.rows; i++ ) - { if( matches[i].distance < 2*min_dist ) + { if( matches[i].distance <= 2*min_dist ) { good_matches.push_back( matches[i]); } } From 83e9b0a87a41e8f70de0810e5ce769ebb2ebd9c5 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 13 Jun 2013 12:40:14 +0400 Subject: [PATCH 054/121] Javadoc waring fix. --- .../java/generator/src/java/android+CameraBridgeViewBase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/java/generator/src/java/android+CameraBridgeViewBase.java b/modules/java/generator/src/java/android+CameraBridgeViewBase.java index b15ae2bd8..c0c9f5bde 100644 --- a/modules/java/generator/src/java/android+CameraBridgeViewBase.java +++ b/modules/java/generator/src/java/android+CameraBridgeViewBase.java @@ -80,10 +80,10 @@ public abstract class CameraBridgeViewBase extends SurfaceView implements Surfac mMaxHeight = MAX_UNSPECIFIED; styledAttrs.recycle(); } - + /** * Sets the camera index - * @param camera index + * @param cameraIndex new camera index */ public void setCameraIndex(int cameraIndex) { this.mCameraIndex = cameraIndex; From 30f7f9717f1f0a8c11ba88d4f04b0c7cf26bba70 Mon Sep 17 00:00:00 2001 From: Dirk Van Haerenborgh Date: Thu, 13 Jun 2013 11:16:33 +0200 Subject: [PATCH 055/121] allow for arbitraty number of sources and sinks --- modules/highgui/src/cap_gstreamer.cpp | 110 +++++++++++++++----------- 1 file changed, 65 insertions(+), 45 deletions(-) diff --git a/modules/highgui/src/cap_gstreamer.cpp b/modules/highgui/src/cap_gstreamer.cpp index a347a7436..4d4dc711b 100644 --- a/modules/highgui/src/cap_gstreamer.cpp +++ b/modules/highgui/src/cap_gstreamer.cpp @@ -651,17 +651,47 @@ bool CvCapture_GStreamer::open( int type, const char* filename ) if(manualpipeline) { + GstIterator *it = NULL; #if GST_VERSION_MAJOR == 0 - GstIterator *it = gst_bin_iterate_sinks(GST_BIN(uridecodebin)); + it = gst_bin_iterate_sinks(GST_BIN(uridecodebin)); if(gst_iterator_next(it, (gpointer *)&sink) != GST_ITERATOR_OK) { CV_ERROR(CV_StsError, "GStreamer: cannot find appsink in manual pipeline\n"); return false; } #else - sink = gst_bin_get_by_name(GST_BIN(uridecodebin), "opencvsink"); - if (!sink){ - sink = gst_bin_get_by_name(GST_BIN(uridecodebin), "appsink0"); + it = gst_bin_iterate_sinks (GST_BIN(uridecodebin)); + + gboolean done = FALSE; + GstElement *element = NULL; + gchar* name = NULL; + GValue value = G_VALUE_INIT; + + while (!done) { + switch (gst_iterator_next (it, &value)) { + case GST_ITERATOR_OK: + element = GST_ELEMENT (g_value_get_object (&value)); + name = gst_element_get_name(element); + if (name){ + if(strstr(name, "opencvsink") != NULL || strstr(name, "appsink") != NULL) { + sink = GST_ELEMENT ( gst_object_ref (element) ); + done = TRUE; + } + g_free(name); + } + g_value_unset (&value); + + break; + case GST_ITERATOR_RESYNC: + gst_iterator_resync (it); + break; + case GST_ITERATOR_ERROR: + case GST_ITERATOR_DONE: + done = TRUE; + break; + } } + gst_iterator_free (it); + if (!sink){ CV_ERROR(CV_StsError, "GStreamer: cannot find appsink in manual pipeline\n"); @@ -1034,15 +1064,8 @@ void CvVideoWriter_GStreamer::close() if (source) gst_object_unref (GST_OBJECT (source)); - if (encodebin) - gst_object_unref (GST_OBJECT (encodebin)); - if (file) gst_object_unref (GST_OBJECT (file)); - - if (buffer) - gst_object_unref (GST_OBJECT (buffer)); - } } @@ -1140,9 +1163,7 @@ bool CvVideoWriter_GStreamer::open( const char * filename, int fourcc, GstEncodingVideoProfile* videoprofile = NULL; #endif -#if GST_VERSION_MAJOR == 0 GstIterator *it = NULL; -#endif // we first try to construct a pipeline from the given string. // if that fails, we assume it is an ordinary filename @@ -1163,39 +1184,38 @@ bool CvVideoWriter_GStreamer::open( const char * filename, int fourcc, return false; } #else - source = gst_bin_get_by_name(GST_BIN(encodebin), "opencvsrc"); - if (!source){ - source = gst_bin_get_by_name(GST_BIN(encodebin), "appsrc0"); + it = gst_bin_iterate_sources (GST_BIN(encodebin)); + + gboolean done = FALSE; + GstElement *element = NULL; + gchar* name = NULL; + GValue value = G_VALUE_INIT; + + while (!done) { + switch (gst_iterator_next (it, &value)) { + case GST_ITERATOR_OK: + element = GST_ELEMENT (g_value_get_object (&value)); + name = gst_element_get_name(element); + if (name){ + if(strstr(name, "opencvsrc") != NULL || strstr(name, "appsrc") != NULL) { + source = GST_ELEMENT ( gst_object_ref (element) ); + done = TRUE; + } + g_free(name); + } + g_value_unset (&value); + + break; + case GST_ITERATOR_RESYNC: + gst_iterator_resync (it); + break; + case GST_ITERATOR_ERROR: + case GST_ITERATOR_DONE: + done = TRUE; + break; + } } - -// GstIterator *it = gst_bin_iterate_sources (GST_BIN(encodebin)); - - -// gboolean done = FALSE; -// GstElement *item = NULL; - -// while (!done) { -// switch (gst_iterator_next (it, &item)) { -// case GST_ITERATOR_OK: -// source = item; -// gst_object_unref (item); -// done = TRUE; -// break; -// case GST_ITERATOR_RESYNC: -// gst_iterator_resync (it); -// break; -// case GST_ITERATOR_ERROR: -// done = TRUE; -// break; -// case GST_ITERATOR_DONE: -// done = TRUE; -// break; -// } -// } -// gst_iterator_free (it); - - - + gst_iterator_free (it); if (!source){ CV_ERROR(CV_StsError, "GStreamer: cannot find appsrc in manual pipeline\n"); From de4c3f01788dbe72bd5a51714d85bfc753b700a5 Mon Sep 17 00:00:00 2001 From: Ivan Korolev Date: Thu, 13 Jun 2013 13:41:43 +0400 Subject: [PATCH 056/121] Fixed a bug related to video stabilization crashes with a blank video (Bug #3023) --- modules/videostab/src/global_motion.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/videostab/src/global_motion.cpp b/modules/videostab/src/global_motion.cpp index 484b598dc..de93d5c5a 100644 --- a/modules/videostab/src/global_motion.cpp +++ b/modules/videostab/src/global_motion.cpp @@ -205,6 +205,9 @@ Mat estimateGlobalMotionRobust( estimateGlobMotionLeastSquaresAffine }; const int npoints = static_cast(points0.size()); + if (npoints < params.size) + return Mat::eye(3, 3, CV_32F); + const int niters = static_cast(ceil(log(1 - params.prob) / log(1 - pow(1 - params.eps, params.size)))); @@ -300,6 +303,8 @@ PyrLkRobustMotionEstimator::PyrLkRobustMotionEstimator() Mat PyrLkRobustMotionEstimator::estimate(const Mat &frame0, const Mat &frame1) { detector_->detect(frame0, keypointsPrev_); + if (keypointsPrev_.empty()) + return Mat::eye(3, 3, CV_32F); pointsPrev_.resize(keypointsPrev_.size()); for (size_t i = 0; i < keypointsPrev_.size(); ++i) From 37b67fa50d83cbc17b85936e02dabee5b87654b7 Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 13 Jun 2013 12:19:25 +0400 Subject: [PATCH 057/121] Fix compiler error due to missing pthread header (Issue #2953) --- modules/highgui/src/cap_ffmpeg_impl.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/highgui/src/cap_ffmpeg_impl.hpp b/modules/highgui/src/cap_ffmpeg_impl.hpp index f5d6b4806..99da45f4c 100644 --- a/modules/highgui/src/cap_ffmpeg_impl.hpp +++ b/modules/highgui/src/cap_ffmpeg_impl.hpp @@ -41,6 +41,9 @@ //M*/ #include "cap_ffmpeg_api.hpp" +#if !(defined(WIN32) || defined(_WIN32) || defined(WINCE)) +# include +#endif #include #include #include From 055137582c40a9756f35185d31368b92cb27143d Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 13 Jun 2013 16:13:46 +0400 Subject: [PATCH 058/121] fix for bug #2985: OPENCLAMDBLAS and OPENCLAMDFFT never detected under linux. lib64/import and lib32/import is the path on Windows but not Linux. for CLAMDBLAS library we should use CLAMDBLAS_PATH (not CLAMDFFT_PATH) --- cmake/OpenCVDetectOpenCL.cmake | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake index 014066bc7..a1e8bbac7 100644 --- a/cmake/OpenCVDetectOpenCL.cmake +++ b/cmake/OpenCVDetectOpenCL.cmake @@ -44,12 +44,18 @@ if(OPENCL_FOUND) set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) - if (X86_64) + if(WIN64) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import) - elseif (X86) + elseif(WIN32) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) endif() + if(X86_64 AND UNIX) + set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64) + elseif(X86 AND UNIX) + set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32) + endif() + if(WITH_OPENCLAMDFFT) find_path(CLAMDFFT_ROOT_DIR NAMES include/clAmdFft.h @@ -80,7 +86,7 @@ if(OPENCL_FOUND) if(WITH_OPENCLAMDBLAS) find_path(CLAMDBLAS_ROOT_DIR NAMES include/clAmdBlas.h - PATHS ENV CLAMDFFT_PATH ENV ProgramFiles + PATHS ENV CLAMDBLAS_PATH ENV ProgramFiles PATH_SUFFIXES clAmdBlas AMD/clAmdBlas DOC "AMD FFT root directory" NO_DEFAULT_PATH) From 0367a7f992bc111ae93060da39dd769b5cdc71c6 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 13 Jun 2013 16:46:34 +0400 Subject: [PATCH 059/121] link with nvcuvenc and ffmpeg libraries only if WITH_NVCUVID is enabled --- modules/gpu/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/gpu/CMakeLists.txt b/modules/gpu/CMakeLists.txt index a471da008..0062944ba 100644 --- a/modules/gpu/CMakeLists.txt +++ b/modules/gpu/CMakeLists.txt @@ -46,15 +46,15 @@ if(HAVE_CUDA) if(WITH_NVCUVID) set(cuda_link_libs ${cuda_link_libs} ${CUDA_CUDA_LIBRARY} ${CUDA_nvcuvid_LIBRARY}) - endif() - if(WIN32) - find_cuda_helper_libs(nvcuvenc) - set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY}) - endif() + if(WIN32) + find_cuda_helper_libs(nvcuvenc) + set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY}) + endif() - if(WITH_FFMPEG) - set(cuda_link_libs ${cuda_link_libs} ${HIGHGUI_LIBRARIES}) + if(WITH_FFMPEG) + set(cuda_link_libs ${cuda_link_libs} ${HIGHGUI_LIBRARIES}) + endif() endif() else() set(lib_cuda "") From 80f6ede2336d1e1b18718e360169b3c2813e557a Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 13 Jun 2013 16:51:45 +0400 Subject: [PATCH 060/121] Fix build problems on MIPS and Linaro NDK. Merged android.toolchain.cmake from project https://github.com/taka-no-me/android-cmake commit: fd1f7055f8b7338644d58d9a2015a784dfa3a5ca --- android/android.toolchain.cmake | 237 ++++++++++++++++------ platforms/android/android.toolchain.cmake | 236 +++++++++++++++------ 2 files changed, 349 insertions(+), 124 deletions(-) diff --git a/android/android.toolchain.cmake b/android/android.toolchain.cmake index df365fc2c..9db174a13 100644 --- a/android/android.toolchain.cmake +++ b/android/android.toolchain.cmake @@ -1,6 +1,7 @@ message(STATUS "Android toolchain was moved to platfroms/android!") message(STATUS "This file is depricated and will be removed!") +# Copyright (c) 2010-2011, Ethan Rublee # Copyright (c) 2011-2013, Andrey Kamaev # All rights reserved. # @@ -291,6 +292,9 @@ message(STATUS "This file is depricated and will be removed!") # - March 2013 # [+] updated for NDK r8e (x86 version) # [+] support x86_64 version of NDK +# - April 2013 +# [+] support non-release NDK layouts (from Linaro git and Android git) +# [~] automatically detect if explicit link to crtbegin_*.o is needed # ------------------------------------------------------------------------------ cmake_minimum_required( VERSION 2.6.3 ) @@ -518,24 +522,19 @@ if( NOT ANDROID_NDK ) endif( ANDROID_NDK ) endif( NOT ANDROID_STANDALONE_TOOLCHAIN ) endif( NOT ANDROID_NDK ) + # remember found paths if( ANDROID_NDK ) get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE ) - # try to detect change - if( CMAKE_AR ) - string( LENGTH "${ANDROID_NDK}" __length ) - string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath ) - if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK ) - message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first. - " ) - endif() - unset( __androidNdkPreviousPath ) - unset( __length ) - endif() set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE ) set( BUILD_WITH_ANDROID_NDK True ) - file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) - string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) + if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" ) + file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) + string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) + else() + set( ANDROID_NDK_RELEASE "r1x" ) + set( ANDROID_NDK_RELEASE_FULL "unreleased" ) + endif() elseif( ANDROID_STANDALONE_TOOLCHAIN ) get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE ) # try to detect change @@ -562,6 +561,51 @@ else() sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" ) endif() +# android NDK layout +if( BUILD_WITH_ANDROID_NDK ) + if( NOT DEFINED ANDROID_NDK_LAYOUT ) + # try to automatically detect the layout + if( EXISTS "${ANDROID_NDK}/RELEASE.TXT") + set( ANDROID_NDK_LAYOUT "RELEASE" ) + elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" ) + set( ANDROID_NDK_LAYOUT "LINARO" ) + elseif( EXISTS "${ANDROID_NDK}/../../gcc/" ) + set( ANDROID_NDK_LAYOUT "ANDROID" ) + endif() + endif() + set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" ) + mark_as_advanced( ANDROID_NDK_LAYOUT ) + if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" ) + elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" ) + else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE" + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" ) + endif() + get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE ) + + # try to detect change of NDK + if( CMAKE_AR ) + string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length ) + string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath ) + if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH ) + message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first. + " ) + endif() + unset( __androidNdkPreviousPath ) + unset( __length ) + endif() +endif() + + # get all the details about standalone toolchain if( BUILD_WITH_STANDALONE_TOOLCHAIN ) __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" ) @@ -589,17 +633,23 @@ if( BUILD_WITH_STANDALONE_TOOLCHAIN ) endif() endif() -macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name ) +macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath ) foreach( __toolchain ${${__availableToolchainsLst}} ) - if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" ) + if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" ) string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" ) else() set( __gcc_toolchain "${__toolchain}" ) endif() - __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" ) + __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" ) if( __machine ) - string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" ) - string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" ) + string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" ) + if( __machine MATCHES i686 ) + set( __arch "x86" ) + elseif( __machine MATCHES arm ) + set( __arch "arm" ) + elseif( __machine MATCHES mipsel ) + set( __arch "mipsel" ) + endif() list( APPEND __availableToolchainMachines "${__machine}" ) list( APPEND __availableToolchainArchs "${__arch}" ) list( APPEND __availableToolchainCompilerVersions "${__version}" ) @@ -617,29 +667,29 @@ if( BUILD_WITH_ANDROID_NDK ) set( __availableToolchainMachines "" ) set( __availableToolchainArchs "" ) set( __availableToolchainCompilerVersions "" ) - if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" ) + if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" ) # do not go through all toolchains if we know the name set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) - if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" ) if( __availableToolchains ) - set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} ) endif() endif() endif() if( NOT __availableToolchains ) - file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" ) + file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" ) if( __availableToolchains ) list(SORT __availableToolchainsLst) # we need clang to go after gcc endif() __LIST_FILTER( __availableToolchainsLst "^[.]" ) __LIST_FILTER( __availableToolchainsLst "llvm" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) - if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" ) if( __availableToolchains ) - set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} ) endif() endif() endif() @@ -770,6 +820,7 @@ else() list( GET __availableToolchainArchs ${__idx} __toolchainArch ) if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME ) list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion ) + string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}") if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion ) set( __toolchainMaxVersion "${__toolchainVersion}" ) set( __toolchainIdx ${__idx} ) @@ -973,11 +1024,11 @@ if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" ) elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" ) string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}") string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) - if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" ) + if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" ) message( FATAL_ERROR "Could not find the Clang compiler driver" ) endif() set( ANDROID_COMPILER_IS_CLANG 1 ) - set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) else() set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) unset( ANDROID_COMPILER_IS_CLANG CACHE ) @@ -991,7 +1042,7 @@ endif() # setup paths and STL for NDK if( BUILD_WITH_ANDROID_NDK ) - set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" ) if( ANDROID_STL STREQUAL "none" ) @@ -1050,11 +1101,11 @@ if( BUILD_WITH_ANDROID_NDK ) endif() # find libsupc++.a - rtti & exceptions if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" ) - if( ANDROID_NDK_RELEASE STRGREATER "r8" ) # r8b - set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) - elseif( NOT ANDROID_NDK_RELEASE STRLESS "r7" AND ANDROID_NDK_RELEASE STRLESS "r8b") - set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) - else( ANDROID_NDK_RELEASE STRLESS "r7" ) + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer + if( NOT EXISTS "${__libsupcxx}" ) + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8 + endif() + if( NOT EXISTS "${__libsupcxx}" ) # before r7 if( ARMEABI_V7A ) if( ANDROID_FORCE_ARM_BUILD ) set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" ) @@ -1104,7 +1155,7 @@ unset( _ndk_ccache ) # setup the cross-compiler if( NOT CMAKE_C_COMPILER ) - if( NDK_CCACHE ) + if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" ) set( CMAKE_C_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" ) set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" ) if( ANDROID_COMPILER_IS_CLANG ) @@ -1176,11 +1227,25 @@ set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm ) remove_definitions( -DANDROID ) add_definitions( -DANDROID ) -if(ANDROID_SYSROOT MATCHES "[ ;\"]") - set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" ) +if( ANDROID_SYSROOT MATCHES "[ ;\"]" ) + if( CMAKE_HOST_WIN32 ) + # try to convert path to 8.3 form + file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" ) + execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}" + OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE __result ERROR_QUIET ) + if( __result EQUAL 0 ) + file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT ) + set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" ) + else() + set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" ) + endif() + else() + set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" ) + endif() if( NOT _CMAKE_IN_TRY_COMPILE ) - # quotes will break try_compile and compiler identification - message(WARNING "Your Android system root has non-alphanumeric symbols. It can break compiler features detection and the whole build.") + # quotes can break try_compile and compiler identification + message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n") endif() else() set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" ) @@ -1251,22 +1316,18 @@ elseif( ARMEABI ) set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" ) endif() +if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) +else() + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) +endif() + # STL if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" ) - if( ANDROID_STL MATCHES "gnustl" ) - set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) - set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) - set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) - else() - set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) - set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) - set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) - endif() - if ( X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" ) - # workaround "undefined reference to `__dso_handle'" problem - set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) - set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) - endif() if( EXISTS "${__libstl}" ) set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" ) set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" ) @@ -1285,9 +1346,12 @@ if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" ) set( CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" ) endif() if( ANDROID_STL MATCHES "gnustl" ) - set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} -lm" ) - set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} -lm" ) - set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lm" ) + if( NOT EXISTS "${ANDROID_LIBM_PATH}" ) + set( ANDROID_LIBM_PATH -lm ) + endif() + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" ) + set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" ) endif() endif() @@ -1323,7 +1387,14 @@ if( ARMEABI_V7A ) endif() if( ANDROID_NO_UNDEFINED ) - set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" ) + if( MIPS ) + # there is some sysroot-related problem in mips linker... + if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" ) + endif() + else() + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" ) + endif() endif() if( ANDROID_SO_UNDEFINED ) @@ -1403,9 +1474,9 @@ set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FL set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" ) if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" ) - set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" ) - set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" ) - set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" ) + set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" ) + set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" ) + set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" ) endif() # configure rtti @@ -1432,6 +1503,43 @@ endif() include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} ) link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" ) +# detect if need link crtbegin_so.o explicitly +if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK ) + set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" ) + string( REPLACE "" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" ) + string( REPLACE "" "-shared" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" ) + string( REPLACE "" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + separate_arguments( __cmd ) + foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN ) + if( ${__var} ) + set( __tmp "${${__var}}" ) + separate_arguments( __tmp ) + string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}") + endif() + endforeach() + string( REPLACE "'" "" __cmd "${__cmd}" ) + string( REPLACE "\"" "" __cmd "${__cmd}" ) + execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET ) + if( __cmd_result EQUAL 0 ) + set( ANDROID_EXPLICIT_CRT_LINK ON ) + else() + set( ANDROID_EXPLICIT_CRT_LINK OFF ) + endif() +endif() + +if( ANDROID_EXPLICIT_CRT_LINK ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) +endif() + # setup output directories set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" ) set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" ) @@ -1523,6 +1631,7 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES ANDROID_NDK_HOST_X64 ANDROID_NDK + ANDROID_NDK_LAYOUT ANDROID_STANDALONE_TOOLCHAIN ANDROID_TOOLCHAIN_NAME ANDROID_ABI @@ -1536,6 +1645,8 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO + ANDROID_LIBM_PATH + ANDROID_EXPLICIT_CRT_LINK ) if( DEFINED ${__var} ) if( "${__var}" MATCHES " ") @@ -1579,6 +1690,7 @@ endif() # ANDROID_STANDALONE_TOOLCHAIN # ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain # ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems) +# ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID) # LIBRARY_OUTPUT_PATH_ROOT : # NDK_CCACHE : # Obsolete: @@ -1624,6 +1736,7 @@ endif() # ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime # ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used # ANDROID_CLANG_VERSION : version of clang compiler if clang is used +# ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product//obj/lib/libm.so) to workaround unresolved `sincos` # # Defaults: # ANDROID_DEFAULT_NDK_API_LEVEL diff --git a/platforms/android/android.toolchain.cmake b/platforms/android/android.toolchain.cmake index 0f7e34067..d7f09c788 100644 --- a/platforms/android/android.toolchain.cmake +++ b/platforms/android/android.toolchain.cmake @@ -289,6 +289,9 @@ # - March 2013 # [+] updated for NDK r8e (x86 version) # [+] support x86_64 version of NDK +# - April 2013 +# [+] support non-release NDK layouts (from Linaro git and Android git) +# [~] automatically detect if explicit link to crtbegin_*.o is needed # ------------------------------------------------------------------------------ cmake_minimum_required( VERSION 2.6.3 ) @@ -516,24 +519,19 @@ if( NOT ANDROID_NDK ) endif( ANDROID_NDK ) endif( NOT ANDROID_STANDALONE_TOOLCHAIN ) endif( NOT ANDROID_NDK ) + # remember found paths if( ANDROID_NDK ) get_filename_component( ANDROID_NDK "${ANDROID_NDK}" ABSOLUTE ) - # try to detect change - if( CMAKE_AR ) - string( LENGTH "${ANDROID_NDK}" __length ) - string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath ) - if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK ) - message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first. - " ) - endif() - unset( __androidNdkPreviousPath ) - unset( __length ) - endif() set( ANDROID_NDK "${ANDROID_NDK}" CACHE INTERNAL "Path of the Android NDK" FORCE ) set( BUILD_WITH_ANDROID_NDK True ) - file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) - string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) + if( EXISTS "${ANDROID_NDK}/RELEASE.TXT" ) + file( STRINGS "${ANDROID_NDK}/RELEASE.TXT" ANDROID_NDK_RELEASE_FULL LIMIT_COUNT 1 REGEX r[0-9]+[a-z]? ) + string( REGEX MATCH r[0-9]+[a-z]? ANDROID_NDK_RELEASE "${ANDROID_NDK_RELEASE_FULL}" ) + else() + set( ANDROID_NDK_RELEASE "r1x" ) + set( ANDROID_NDK_RELEASE_FULL "unreleased" ) + endif() elseif( ANDROID_STANDALONE_TOOLCHAIN ) get_filename_component( ANDROID_STANDALONE_TOOLCHAIN "${ANDROID_STANDALONE_TOOLCHAIN}" ABSOLUTE ) # try to detect change @@ -560,6 +558,51 @@ else() sudo ln -s ~/my-android-toolchain ${ANDROID_STANDALONE_TOOLCHAIN_SEARCH_PATH}" ) endif() +# android NDK layout +if( BUILD_WITH_ANDROID_NDK ) + if( NOT DEFINED ANDROID_NDK_LAYOUT ) + # try to automatically detect the layout + if( EXISTS "${ANDROID_NDK}/RELEASE.TXT") + set( ANDROID_NDK_LAYOUT "RELEASE" ) + elseif( EXISTS "${ANDROID_NDK}/../../linux-x86/toolchain/" ) + set( ANDROID_NDK_LAYOUT "LINARO" ) + elseif( EXISTS "${ANDROID_NDK}/../../gcc/" ) + set( ANDROID_NDK_LAYOUT "ANDROID" ) + endif() + endif() + set( ANDROID_NDK_LAYOUT "${ANDROID_NDK_LAYOUT}" CACHE STRING "The inner layout of NDK" ) + mark_as_advanced( ANDROID_NDK_LAYOUT ) + if( ANDROID_NDK_LAYOUT STREQUAL "LINARO" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../${ANDROID_NDK_HOST_SYSTEM_NAME}/toolchain" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" ) + elseif( ANDROID_NDK_LAYOUT STREQUAL "ANDROID" ) + set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) # only 32-bit at the moment + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/../../gcc/${ANDROID_NDK_HOST_SYSTEM_NAME}/arm" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "" ) + else() # ANDROID_NDK_LAYOUT STREQUAL "RELEASE" + set( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK}/toolchains" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH2 "/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME2}" ) + endif() + get_filename_component( ANDROID_NDK_TOOLCHAINS_PATH "${ANDROID_NDK_TOOLCHAINS_PATH}" ABSOLUTE ) + + # try to detect change of NDK + if( CMAKE_AR ) + string( LENGTH "${ANDROID_NDK_TOOLCHAINS_PATH}" __length ) + string( SUBSTRING "${CMAKE_AR}" 0 ${__length} __androidNdkPreviousPath ) + if( NOT __androidNdkPreviousPath STREQUAL ANDROID_NDK_TOOLCHAINS_PATH ) + message( FATAL_ERROR "It is not possible to change the path to the NDK on subsequent CMake run. You must remove all generated files from your build folder first. + " ) + endif() + unset( __androidNdkPreviousPath ) + unset( __length ) + endif() +endif() + + # get all the details about standalone toolchain if( BUILD_WITH_STANDALONE_TOOLCHAIN ) __DETECT_NATIVE_API_LEVEL( ANDROID_SUPPORTED_NATIVE_API_LEVELS "${ANDROID_STANDALONE_TOOLCHAIN}/sysroot/usr/include/android/api-level.h" ) @@ -587,17 +630,23 @@ if( BUILD_WITH_STANDALONE_TOOLCHAIN ) endif() endif() -macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __host_system_name ) +macro( __GLOB_NDK_TOOLCHAINS __availableToolchainsVar __availableToolchainsLst __toolchain_subpath ) foreach( __toolchain ${${__availableToolchainsLst}} ) - if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK}/toolchains/${__toolchain}/prebuilt/" ) + if( "${__toolchain}" MATCHES "-clang3[.][0-9]$" AND NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${__toolchain}${__toolchain_subpath}" ) string( REGEX REPLACE "-clang3[.][0-9]$" "-4.6" __gcc_toolchain "${__toolchain}" ) else() set( __gcc_toolchain "${__toolchain}" ) endif() - __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK}/toolchains/${__gcc_toolchain}/prebuilt/${__host_system_name}" ) + __DETECT_TOOLCHAIN_MACHINE_NAME( __machine "${ANDROID_NDK_TOOLCHAINS_PATH}/${__gcc_toolchain}${__toolchain_subpath}" ) if( __machine ) - string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9]+)?$" __version "${__gcc_toolchain}" ) - string( REGEX MATCH "^[^-]+" __arch "${__gcc_toolchain}" ) + string( REGEX MATCH "[0-9]+[.][0-9]+([.][0-9x]+)?$" __version "${__gcc_toolchain}" ) + if( __machine MATCHES i686 ) + set( __arch "x86" ) + elseif( __machine MATCHES arm ) + set( __arch "arm" ) + elseif( __machine MATCHES mipsel ) + set( __arch "mipsel" ) + endif() list( APPEND __availableToolchainMachines "${__machine}" ) list( APPEND __availableToolchainArchs "${__arch}" ) list( APPEND __availableToolchainCompilerVersions "${__version}" ) @@ -615,29 +664,29 @@ if( BUILD_WITH_ANDROID_NDK ) set( __availableToolchainMachines "" ) set( __availableToolchainArchs "" ) set( __availableToolchainCompilerVersions "" ) - if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK}/toolchains/${ANDROID_TOOLCHAIN_NAME}/" ) + if( ANDROID_TOOLCHAIN_NAME AND EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_TOOLCHAIN_NAME}/" ) # do not go through all toolchains if we know the name set( __availableToolchainsLst "${ANDROID_TOOLCHAIN_NAME}" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) - if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" ) if( __availableToolchains ) - set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} ) endif() endif() endif() if( NOT __availableToolchains ) - file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK}/toolchains" "${ANDROID_NDK}/toolchains/*" ) + file( GLOB __availableToolchainsLst RELATIVE "${ANDROID_NDK_TOOLCHAINS_PATH}" "${ANDROID_NDK_TOOLCHAINS_PATH}/*" ) if( __availableToolchains ) list(SORT __availableToolchainsLst) # we need clang to go after gcc endif() __LIST_FILTER( __availableToolchainsLst "^[.]" ) __LIST_FILTER( __availableToolchainsLst "llvm" ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME} ) - if( NOT __availableToolchains AND NOT ANDROID_NDK_HOST_SYSTEM_NAME STREQUAL ANDROID_NDK_HOST_SYSTEM_NAME2 ) - __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) + if( NOT __availableToolchains AND NOT ANDROID_NDK_TOOLCHAINS_SUBPATH STREQUAL ANDROID_NDK_TOOLCHAINS_SUBPATH2 ) + __GLOB_NDK_TOOLCHAINS( __availableToolchains __availableToolchainsLst "${ANDROID_NDK_TOOLCHAINS_SUBPATH2}" ) if( __availableToolchains ) - set( ANDROID_NDK_HOST_SYSTEM_NAME ${ANDROID_NDK_HOST_SYSTEM_NAME2} ) + set( ANDROID_NDK_TOOLCHAINS_SUBPATH ${ANDROID_NDK_TOOLCHAINS_SUBPATH2} ) endif() endif() endif() @@ -768,6 +817,7 @@ else() list( GET __availableToolchainArchs ${__idx} __toolchainArch ) if( __toolchainArch STREQUAL ANDROID_ARCH_FULLNAME ) list( GET __availableToolchainCompilerVersions ${__idx} __toolchainVersion ) + string( REPLACE "x" "99" __toolchainVersion "${__toolchainVersion}") if( __toolchainVersion VERSION_GREATER __toolchainMaxVersion ) set( __toolchainMaxVersion "${__toolchainVersion}" ) set( __toolchainIdx ${__idx} ) @@ -971,11 +1021,11 @@ if( "${ANDROID_TOOLCHAIN_NAME}" STREQUAL "standalone-clang" ) elseif( "${ANDROID_TOOLCHAIN_NAME}" MATCHES "-clang3[.][0-9]?$" ) string( REGEX MATCH "3[.][0-9]$" ANDROID_CLANG_VERSION "${ANDROID_TOOLCHAIN_NAME}") string( REGEX REPLACE "-clang${ANDROID_CLANG_VERSION}$" "-4.6" ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) - if( NOT EXISTS "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}/bin/clang${TOOL_OS_SUFFIX}" ) + if( NOT EXISTS "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}/bin/clang${TOOL_OS_SUFFIX}" ) message( FATAL_ERROR "Could not find the Clang compiler driver" ) endif() set( ANDROID_COMPILER_IS_CLANG 1 ) - set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/llvm-${ANDROID_CLANG_VERSION}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_CLANG_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/llvm-${ANDROID_CLANG_VERSION}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) else() set( ANDROID_GCC_TOOLCHAIN_NAME "${ANDROID_TOOLCHAIN_NAME}" ) unset( ANDROID_COMPILER_IS_CLANG CACHE ) @@ -989,7 +1039,7 @@ endif() # setup paths and STL for NDK if( BUILD_WITH_ANDROID_NDK ) - set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/prebuilt/${ANDROID_NDK_HOST_SYSTEM_NAME}" ) + set( ANDROID_TOOLCHAIN_ROOT "${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}${ANDROID_NDK_TOOLCHAINS_SUBPATH}" ) set( ANDROID_SYSROOT "${ANDROID_NDK}/platforms/android-${ANDROID_NATIVE_API_LEVEL}/arch-${ANDROID_ARCH_NAME}" ) if( ANDROID_STL STREQUAL "none" ) @@ -1048,11 +1098,11 @@ if( BUILD_WITH_ANDROID_NDK ) endif() # find libsupc++.a - rtti & exceptions if( ANDROID_STL STREQUAL "system_re" OR ANDROID_STL MATCHES "gnustl" ) - if( ANDROID_NDK_RELEASE STRGREATER "r8" ) # r8b - set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) - elseif( NOT ANDROID_NDK_RELEASE STRLESS "r7" AND ANDROID_NDK_RELEASE STRLESS "r8b") - set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) - else( ANDROID_NDK_RELEASE STRLESS "r7" ) + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/${ANDROID_COMPILER_VERSION}/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r8b or newer + if( NOT EXISTS "${__libsupcxx}" ) + set( __libsupcxx "${ANDROID_NDK}/sources/cxx-stl/gnu-libstdc++/libs/${ANDROID_NDK_ABI_NAME}/libsupc++.a" ) # r7-r8 + endif() + if( NOT EXISTS "${__libsupcxx}" ) # before r7 if( ARMEABI_V7A ) if( ANDROID_FORCE_ARM_BUILD ) set( __libsupcxx "${ANDROID_TOOLCHAIN_ROOT}/${ANDROID_TOOLCHAIN_MACHINE_NAME}/lib/${CMAKE_SYSTEM_PROCESSOR}/libsupc++.a" ) @@ -1102,7 +1152,7 @@ unset( _ndk_ccache ) # setup the cross-compiler if( NOT CMAKE_C_COMPILER ) - if( NDK_CCACHE ) + if( NDK_CCACHE AND NOT ANDROID_SYSROOT MATCHES "[ ;\"]" ) set( CMAKE_C_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C compiler" ) set( CMAKE_CXX_COMPILER "${NDK_CCACHE}" CACHE PATH "ccache as C++ compiler" ) if( ANDROID_COMPILER_IS_CLANG ) @@ -1174,11 +1224,25 @@ set( CMAKE_ASM_SOURCE_FILE_EXTENSIONS s S asm ) remove_definitions( -DANDROID ) add_definitions( -DANDROID ) -if(ANDROID_SYSROOT MATCHES "[ ;\"]") - set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" ) +if( ANDROID_SYSROOT MATCHES "[ ;\"]" ) + if( CMAKE_HOST_WIN32 ) + # try to convert path to 8.3 form + file( WRITE "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "@echo %~s1" ) + execute_process( COMMAND "$ENV{ComSpec}" /c "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/cvt83.cmd" "${ANDROID_SYSROOT}" + OUTPUT_VARIABLE __path OUTPUT_STRIP_TRAILING_WHITESPACE + RESULT_VARIABLE __result ERROR_QUIET ) + if( __result EQUAL 0 ) + file( TO_CMAKE_PATH "${__path}" ANDROID_SYSROOT ) + set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" ) + else() + set( ANDROID_CXX_FLAGS "--sysroot=\"${ANDROID_SYSROOT}\"" ) + endif() + else() + set( ANDROID_CXX_FLAGS "'--sysroot=${ANDROID_SYSROOT}'" ) + endif() if( NOT _CMAKE_IN_TRY_COMPILE ) - # quotes will break try_compile and compiler identification - message(WARNING "Your Android system root has non-alphanumeric symbols. It can break compiler features detection and the whole build.") + # quotes can break try_compile and compiler identification + message(WARNING "Path to your Android NDK (or toolchain) has non-alphanumeric symbols.\nThe build might be broken.\n") endif() else() set( ANDROID_CXX_FLAGS "--sysroot=${ANDROID_SYSROOT}" ) @@ -1249,22 +1313,18 @@ elseif( ARMEABI ) set( ANDROID_CXX_FLAGS "${ANDROID_CXX_FLAGS} -march=armv5te -mtune=xscale -msoft-float" ) endif() +if( ANDROID_STL MATCHES "gnustl" AND (EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}") ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) +else() + set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) + set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) + set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) +endif() + # STL if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" ) - if( ANDROID_STL MATCHES "gnustl" ) - set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) - set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) - set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) - else() - set( CMAKE_CXX_CREATE_SHARED_LIBRARY " -o " ) - set( CMAKE_CXX_CREATE_SHARED_MODULE " -o " ) - set( CMAKE_CXX_LINK_EXECUTABLE " -o " ) - endif() - if ( X86 AND ANDROID_STL MATCHES "gnustl" AND ANDROID_NDK_RELEASE STREQUAL "r6" ) - # workaround "undefined reference to `__dso_handle'" problem - set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) - set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) - endif() if( EXISTS "${__libstl}" ) set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${__libstl}\"" ) set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${__libstl}\"" ) @@ -1283,9 +1343,12 @@ if( EXISTS "${__libstl}" OR EXISTS "${__libsupcxx}" ) set( CMAKE_C_LINK_EXECUTABLE "${CMAKE_C_LINK_EXECUTABLE} \"${__libsupcxx}\"" ) endif() if( ANDROID_STL MATCHES "gnustl" ) - set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} -lm" ) - set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} -lm" ) - set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} -lm" ) + if( NOT EXISTS "${ANDROID_LIBM_PATH}" ) + set( ANDROID_LIBM_PATH -lm ) + endif() + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} ${ANDROID_LIBM_PATH}" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} ${ANDROID_LIBM_PATH}" ) + set( CMAKE_CXX_LINK_EXECUTABLE "${CMAKE_CXX_LINK_EXECUTABLE} ${ANDROID_LIBM_PATH}" ) endif() endif() @@ -1321,7 +1384,14 @@ if( ARMEABI_V7A ) endif() if( ANDROID_NO_UNDEFINED ) - set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" ) + if( MIPS ) + # there is some sysroot-related problem in mips linker... + if( NOT ANDROID_SYSROOT MATCHES "[ ;\"]" ) + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined -Wl,-rpath-link,${ANDROID_SYSROOT}/usr/lib" ) + endif() + else() + set( ANDROID_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} -Wl,--no-undefined" ) + endif() endif() if( ANDROID_SO_UNDEFINED ) @@ -1401,9 +1471,9 @@ set( CMAKE_MODULE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_MODULE_LINKER_FL set( CMAKE_EXE_LINKER_FLAGS "${ANDROID_LINKER_FLAGS} ${CMAKE_EXE_LINKER_FLAGS}" ) if( MIPS AND BUILD_WITH_ANDROID_NDK AND ANDROID_NDK_RELEASE STREQUAL "r8" ) - set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" ) - set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" ) - set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK}/toolchains/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" ) + set( CMAKE_SHARED_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_SHARED_LINKER_FLAGS}" ) + set( CMAKE_MODULE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.xsc ${CMAKE_MODULE_LINKER_FLAGS}" ) + set( CMAKE_EXE_LINKER_FLAGS "-Wl,-T,${ANDROID_NDK_TOOLCHAINS_PATH}/${ANDROID_GCC_TOOLCHAIN_NAME}/mipself.x ${CMAKE_EXE_LINKER_FLAGS}" ) endif() # configure rtti @@ -1430,6 +1500,43 @@ endif() include_directories( SYSTEM "${ANDROID_SYSROOT}/usr/include" ${ANDROID_STL_INCLUDE_DIRS} ) link_directories( "${CMAKE_INSTALL_PREFIX}/libs/${ANDROID_NDK_ABI_NAME}" ) +# detect if need link crtbegin_so.o explicitly +if( NOT DEFINED ANDROID_EXPLICIT_CRT_LINK ) + set( __cmd "${CMAKE_CXX_CREATE_SHARED_LIBRARY}" ) + string( REPLACE "" "${CMAKE_CXX_COMPILER} ${CMAKE_CXX_COMPILER_ARG1}" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_C_COMPILER} ${CMAKE_C_COMPILER_ARG1}" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_CXX_FLAGS}" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_SHARED_LINKER_FLAGS}" __cmd "${__cmd}" ) + string( REPLACE "" "-shared" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + string( REPLACE "" "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/toolchain_crtlink_test.so" __cmd "${__cmd}" ) + string( REPLACE "" "\"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" __cmd "${__cmd}" ) + string( REPLACE "" "" __cmd "${__cmd}" ) + separate_arguments( __cmd ) + foreach( __var ANDROID_NDK ANDROID_NDK_TOOLCHAINS_PATH ANDROID_STANDALONE_TOOLCHAIN ) + if( ${__var} ) + set( __tmp "${${__var}}" ) + separate_arguments( __tmp ) + string( REPLACE "${__tmp}" "${${__var}}" __cmd "${__cmd}") + endif() + endforeach() + string( REPLACE "'" "" __cmd "${__cmd}" ) + string( REPLACE "\"" "" __cmd "${__cmd}" ) + execute_process( COMMAND ${__cmd} RESULT_VARIABLE __cmd_result OUTPUT_QUIET ERROR_QUIET ) + if( __cmd_result EQUAL 0 ) + set( ANDROID_EXPLICIT_CRT_LINK ON ) + else() + set( ANDROID_EXPLICIT_CRT_LINK OFF ) + endif() +endif() + +if( ANDROID_EXPLICIT_CRT_LINK ) + set( CMAKE_CXX_CREATE_SHARED_LIBRARY "${CMAKE_CXX_CREATE_SHARED_LIBRARY} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) + set( CMAKE_CXX_CREATE_SHARED_MODULE "${CMAKE_CXX_CREATE_SHARED_MODULE} \"${ANDROID_SYSROOT}/usr/lib/crtbegin_so.o\"" ) +endif() + # setup output directories set( LIBRARY_OUTPUT_PATH_ROOT ${CMAKE_SOURCE_DIR} CACHE PATH "root for library output, set this to change where android libs are installed to" ) set( CMAKE_INSTALL_PREFIX "${ANDROID_TOOLCHAIN_ROOT}/user" CACHE STRING "path for installing" ) @@ -1521,6 +1628,7 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) foreach( __var NDK_CCACHE LIBRARY_OUTPUT_PATH_ROOT ANDROID_FORBID_SYGWIN ANDROID_SET_OBSOLETE_VARIABLES ANDROID_NDK_HOST_X64 ANDROID_NDK + ANDROID_NDK_LAYOUT ANDROID_STANDALONE_TOOLCHAIN ANDROID_TOOLCHAIN_NAME ANDROID_ABI @@ -1534,6 +1642,8 @@ if( NOT PROJECT_NAME STREQUAL "CMAKE_TRY_COMPILE" ) ANDROID_GOLD_LINKER ANDROID_NOEXECSTACK ANDROID_RELRO + ANDROID_LIBM_PATH + ANDROID_EXPLICIT_CRT_LINK ) if( DEFINED ${__var} ) if( "${__var}" MATCHES " ") @@ -1577,6 +1687,7 @@ endif() # ANDROID_STANDALONE_TOOLCHAIN # ANDROID_TOOLCHAIN_NAME : the NDK name of compiler toolchain # ANDROID_NDK_HOST_X64 : try to use x86_64 toolchain (default for x64 host systems) +# ANDROID_NDK_LAYOUT : the inner NDK structure (RELEASE, LINARO, ANDROID) # LIBRARY_OUTPUT_PATH_ROOT : # NDK_CCACHE : # Obsolete: @@ -1622,6 +1733,7 @@ endif() # ANDROID_EXCEPTIONS : if exceptions are enabled by the runtime # ANDROID_GCC_TOOLCHAIN_NAME : read-only, differs from ANDROID_TOOLCHAIN_NAME only if clang is used # ANDROID_CLANG_VERSION : version of clang compiler if clang is used +# ANDROID_LIBM_PATH : path to libm.so (set to something like $(TOP)/out/target/product//obj/lib/libm.so) to workaround unresolved `sincos` # # Defaults: # ANDROID_DEFAULT_NDK_API_LEVEL From c90abb6a037eb838099322721860ef5c732ca5a2 Mon Sep 17 00:00:00 2001 From: Sergei Nosov Date: Thu, 13 Jun 2013 21:14:42 +0400 Subject: [PATCH 061/121] add multiruns to fix "unreliable results" error --- modules/core/perf/perf_reduce.cpp | 4 ++-- modules/imgproc/perf/perf_cvt_color.cpp | 6 ++++-- modules/imgproc/perf/perf_morph.cpp | 3 ++- modules/imgproc/perf/perf_remap.cpp | 3 ++- modules/imgproc/perf/perf_threshold.cpp | 2 +- modules/video/perf/perf_optflowpyrlk.cpp | 5 +++-- 6 files changed, 14 insertions(+), 9 deletions(-) diff --git a/modules/core/perf/perf_reduce.cpp b/modules/core/perf/perf_reduce.cpp index 93d3a1416..7b74b0e7e 100644 --- a/modules/core/perf/perf_reduce.cpp +++ b/modules/core/perf/perf_reduce.cpp @@ -34,7 +34,8 @@ PERF_TEST_P(Size_MatType_ROp, reduceR, declare.in(src, WARMUP_RNG).out(vec); declare.time(100); - TEST_CYCLE() reduce(src, vec, 0, reduceOp, ddepth); + int runs = 15; + TEST_CYCLE_MULTIRUN(runs) reduce(src, vec, 0, reduceOp, ddepth); SANITY_CHECK(vec, 1); } @@ -65,4 +66,3 @@ PERF_TEST_P(Size_MatType_ROp, reduceC, SANITY_CHECK(vec, 1); } - diff --git a/modules/imgproc/perf/perf_cvt_color.cpp b/modules/imgproc/perf/perf_cvt_color.cpp index 9b87afe99..89c7c6916 100644 --- a/modules/imgproc/perf/perf_cvt_color.cpp +++ b/modules/imgproc/perf/perf_cvt_color.cpp @@ -258,7 +258,8 @@ PERF_TEST_P(Size_CvtMode, cvtColor8u, declare.time(100); declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() cvtColor(src, dst, mode, ch.dcn); + int runs = sz.width <= 320 ? 70 : 1; + TEST_CYCLE_MULTIRUN(runs) cvtColor(src, dst, mode, ch.dcn); SANITY_CHECK(dst, 1); } @@ -334,7 +335,8 @@ PERF_TEST_P(Size_CvtMode3, cvtColorRGB2YUV420p, declare.time(100); declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() cvtColor(src, dst, mode, ch.dcn); + int runs = (sz.width <= 640) ? 10 : 1; + TEST_CYCLE_MULTIRUN(runs) cvtColor(src, dst, mode, ch.dcn); SANITY_CHECK(dst, 1); } diff --git a/modules/imgproc/perf/perf_morph.cpp b/modules/imgproc/perf/perf_morph.cpp index 9aadeaff5..d3dbba38f 100644 --- a/modules/imgproc/perf/perf_morph.cpp +++ b/modules/imgproc/perf/perf_morph.cpp @@ -19,7 +19,8 @@ PERF_TEST_P(Size_MatType, erode, TYPICAL_MATS_MORPH) declare.in(src, WARMUP_RNG).out(dst); - TEST_CYCLE() erode(src, dst, noArray()); + int runs = (sz.width <= 320) ? 15 : 1; + TEST_CYCLE_MULTIRUN(runs) erode(src, dst, noArray()); SANITY_CHECK(dst); } diff --git a/modules/imgproc/perf/perf_remap.cpp b/modules/imgproc/perf/perf_remap.cpp index 334c5ff96..92c6007a2 100644 --- a/modules/imgproc/perf/perf_remap.cpp +++ b/modules/imgproc/perf/perf_remap.cpp @@ -63,7 +63,8 @@ PERF_TEST_P( TestRemap, Remap, declare.in(src, WARMUP_RNG).out(dst).time(20); - TEST_CYCLE() remap(src, dst, map1, map2, inter_type); + int runs = (sz.width <= 640) ? 3 : 1; + TEST_CYCLE_MULTIRUN(runs) remap(src, dst, map1, map2, inter_type); SANITY_CHECK(dst); } diff --git a/modules/imgproc/perf/perf_threshold.cpp b/modules/imgproc/perf/perf_threshold.cpp index 61255e228..01fff2e8c 100644 --- a/modules/imgproc/perf/perf_threshold.cpp +++ b/modules/imgproc/perf/perf_threshold.cpp @@ -32,7 +32,7 @@ PERF_TEST_P(Size_MatType_ThreshType, threshold, declare.in(src, WARMUP_RNG).out(dst); - int runs = (sz.width <= 640) ? 8 : 1; + int runs = (sz.width <= 640) ? 40 : 1; TEST_CYCLE_MULTIRUN(runs) threshold(src, dst, thresh, maxval, threshType); SANITY_CHECK(dst); diff --git a/modules/video/perf/perf_optflowpyrlk.cpp b/modules/video/perf/perf_optflowpyrlk.cpp index 12005f8ff..8c53db03a 100644 --- a/modules/video/perf/perf_optflowpyrlk.cpp +++ b/modules/video/perf/perf_optflowpyrlk.cpp @@ -165,7 +165,8 @@ PERF_TEST_P(Path_Idx_Cn_NPoints_WSize_Deriv, OpticalFlowPyrLK_self, testing::Com declare.in(pyramid1, pyramid2, inPoints).out(outPoints); declare.time(400); - TEST_CYCLE() + int runs = 3; + TEST_CYCLE_MULTIRUN(runs) { calcOpticalFlowPyrLK(pyramid1, pyramid2, inPoints, outPoints, status, err, Size(winSize, winSize), maxLevel, criteria, @@ -217,4 +218,4 @@ PERF_TEST_P(Path_Win_Deriv_Border_Reuse, OpticalFlowPyrLK_pyr, testing::Combine( } SANITY_CHECK(pyramid); -} \ No newline at end of file +} From 931ebab822da3b7924921d5210b28cb0d9486bde Mon Sep 17 00:00:00 2001 From: Gabe Schwartz Date: Wed, 12 Jun 2013 16:02:30 -0400 Subject: [PATCH 062/121] Updated generator and parser to support Python 3. --- modules/python/src2/gen2.py | 79 ++++++++++++++++++------------- modules/python/src2/hdr_parser.py | 51 ++++++++++---------- 2 files changed, 71 insertions(+), 59 deletions(-) diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 0fed1838b..5ef123af6 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -1,8 +1,14 @@ #!/usr/bin/env python -import hdr_parser, sys, re, os, cStringIO +from __future__ import print_function +import hdr_parser, sys, re, os from string import Template +if sys.version_info[0] >= 3: + from io import StringIO +else: + from cStringIO import StringIO + ignored_arg_types = ["RNG*"] gen_template_check_self = Template(""" if(!PyObject_TypeCheck(self, &pyopencv_${name}_Type)) @@ -33,6 +39,13 @@ gen_template_func_body = Template("""$code_decl } """) +py_major_version = sys.version_info[0] +if py_major_version >= 3: + head_init_str = "PyVarObject_HEAD_INIT(&PyType_Type, 0)" +else: + head_init_str = """PyObject_HEAD_INIT(&PyType_Type) +0,""" + gen_template_simple_type_decl = Template(""" struct pyopencv_${name}_t { @@ -42,8 +55,7 @@ struct pyopencv_${name}_t static PyTypeObject pyopencv_${name}_Type = { - PyObject_HEAD_INIT(&PyType_Type) - 0, + %s MODULESTR".$wname", sizeof(pyopencv_${name}_t), }; @@ -66,13 +78,13 @@ template<> bool pyopencv_to(PyObject* src, ${cname}& dst, const char* name) return true; if(!PyObject_TypeCheck(src, &pyopencv_${name}_Type)) { - failmsg("Expected ${cname} for argument '%s'", name); + failmsg("Expected ${cname} for argument '%%s'", name); return false; } dst = ((pyopencv_${name}_t*)src)->v; return true; } -""") +""" % head_init_str) gen_template_type_decl = Template(""" @@ -84,8 +96,7 @@ struct pyopencv_${name}_t static PyTypeObject pyopencv_${name}_Type = { - PyObject_HEAD_INIT(&PyType_Type) - 0, + %s MODULESTR".$wname", sizeof(pyopencv_${name}_t), }; @@ -110,14 +121,14 @@ template<> bool pyopencv_to(PyObject* src, Ptr<${cname}>& dst, const char* name) return true; if(!PyObject_TypeCheck(src, &pyopencv_${name}_Type)) { - failmsg("Expected ${cname} for argument '%s'", name); + failmsg("Expected ${cname} for argument '%%s'", name); return false; } dst = ((pyopencv_${name}_t*)src)->v; return true; } -""") +""" % head_init_str) gen_template_map_type_cvt = Template(""" template<> bool pyopencv_to(PyObject* src, ${cname}& dst, const char* name); @@ -245,9 +256,9 @@ class ClassInfo(object): if decl: self.bases = decl[1].split()[1:] if len(self.bases) > 1: - print "Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,) - print " Bases: ", " ".join(self.bases) - print " Only the first base class will be used" + print("Note: Class %s has more than 1 base class (not supported by Python C extensions)" % (self.name,)) + print(" Bases: ", " ".join(self.bases)) + print(" Only the first base class will be used") self.bases = [self.bases[0].strip(",")] #return sys.exit(-1) if self.bases and self.bases[0].startswith("cv::"): @@ -280,8 +291,8 @@ class ClassInfo(object): if self.ismap: return self.gen_map_code(all_classes) - getset_code = cStringIO.StringIO() - getset_inits = cStringIO.StringIO() + getset_code = StringIO() + getset_inits = StringIO() sorted_props = [(p.name, p) for p in self.props] sorted_props.sort() @@ -304,10 +315,10 @@ class ClassInfo(object): getset_code.write(gen_template_set_prop.substitute(name=self.name, member=pname, membertype=p.tp, access=access_op)) getset_inits.write(gen_template_rw_prop_init.substitute(name=self.name, member=pname)) - methods_code = cStringIO.StringIO() - methods_inits = cStringIO.StringIO() + methods_code = StringIO() + methods_inits = StringIO() - sorted_methods = self.methods.items() + sorted_methods = list(self.methods.items()) sorted_methods.sort() for mname, m in sorted_methods: @@ -315,7 +326,7 @@ class ClassInfo(object): methods_inits.write(m.get_tab_entry()) baseptr = "NULL" - if self.bases and all_classes.has_key(self.bases[0]): + if self.bases and self.bases[0] in all_classes: baseptr = "&pyopencv_" + all_classes[self.bases[0]].name + "_Type" code = gen_template_type_impl.substitute(name=self.name, wname=self.wname, cname=self.cname, @@ -609,7 +620,7 @@ class FuncInfo(object): defval0 = "0" tp1 = tp.replace("*", "_ptr") if tp1.endswith("*"): - print "Error: type with star: a.tp=%s, tp=%s, tp1=%s" % (a.tp, tp, tp1) + print("Error: type with star: a.tp=%s, tp=%s, tp1=%s" % (a.tp, tp, tp1)) sys.exit(-1) amapping = simple_argtype_mapping.get(tp, (tp, "O", defval0)) @@ -715,11 +726,11 @@ class PythonWrapperGenerator(object): self.classes = {} self.funcs = {} self.consts = {} - self.code_types = cStringIO.StringIO() - self.code_funcs = cStringIO.StringIO() - self.code_func_tab = cStringIO.StringIO() - self.code_type_reg = cStringIO.StringIO() - self.code_const_reg = cStringIO.StringIO() + self.code_types = StringIO() + self.code_funcs = StringIO() + self.code_func_tab = StringIO() + self.code_type_reg = StringIO() + self.code_const_reg = StringIO() self.class_idx = 0 def add_class(self, stype, name, decl): @@ -727,9 +738,9 @@ class PythonWrapperGenerator(object): classinfo.decl_idx = self.class_idx self.class_idx += 1 - if self.classes.has_key(classinfo.name): - print "Generator error: class %s (cname=%s) already exists" \ - % (classinfo.name, classinfo.cname) + if classinfo.name in self.classes: + print("Generator error: class %s (cname=%s) already exists" \ + % (classinfo.name, classinfo.cname)) sys.exit(-1) self.classes[classinfo.name] = classinfo if classinfo.bases and not classinfo.isalgorithm: @@ -738,9 +749,9 @@ class PythonWrapperGenerator(object): def add_const(self, name, decl): constinfo = ConstInfo(name, decl[1]) - if self.consts.has_key(constinfo.name): - print "Generator error: constant %s (cname=%s) already exists" \ - % (constinfo.name, constinfo.cname) + if constinfo.name in self.consts: + print("Generator error: constant %s (cname=%s) already exists" \ + % (constinfo.name, constinfo.cname)) sys.exit(-1) self.consts[constinfo.name] = constinfo @@ -779,7 +790,7 @@ class PythonWrapperGenerator(object): else: classinfo = self.classes.get(classname, ClassInfo("")) if not classinfo.name: - print "Generator error: the class for method %s is missing" % (name,) + print("Generator error: the class for method %s is missing" % (name,)) sys.exit(-1) func_map = classinfo.methods @@ -819,7 +830,7 @@ class PythonWrapperGenerator(object): self.add_func(decl) # step 2: generate code for the classes and their methods - classlist = self.classes.items() + classlist = list(self.classes.items()) classlist.sort() for name, classinfo in classlist: if classinfo.ismap: @@ -844,7 +855,7 @@ class PythonWrapperGenerator(object): self.code_type_reg.write("MKTYPE2(%s);\n" % (classinfo.name,) ) # step 3: generate the code for all the global functions - funclist = self.funcs.items() + funclist = list(self.funcs.items()) funclist.sort() for name, func in funclist: code = func.gen_code(self.classes) @@ -852,7 +863,7 @@ class PythonWrapperGenerator(object): self.code_func_tab.write(func.get_tab_entry()) # step 4: generate the code for constants - constlist = self.consts.items() + constlist = list(self.consts.items()) constlist.sort() for name, constinfo in constlist: self.gen_const_reg(constinfo) diff --git a/modules/python/src2/hdr_parser.py b/modules/python/src2/hdr_parser.py index b13fe8cf8..b6f21c31e 100755 --- a/modules/python/src2/hdr_parser.py +++ b/modules/python/src2/hdr_parser.py @@ -1,5 +1,6 @@ #!/usr/bin/env python +from __future__ import print_function import os, sys, re, string # the list only for debugging. The real list, used in the real OpenCV build, is specified in CMakeLists.txt @@ -43,13 +44,13 @@ class CppHeaderParser(object): def get_macro_arg(self, arg_str, npos): npos2 = npos3 = arg_str.find("(", npos) if npos2 < 0: - print "Error: no arguments for the macro at %d" % (self.lineno,) + print("Error: no arguments for the macro at %d" % (self.lineno,)) sys.exit(-1) balance = 1 while 1: t, npos3 = self.find_next_token(arg_str, ['(', ')'], npos3+1) if npos3 < 0: - print "Error: no matching ')' in the macro call at %d" % (self.lineno,) + print("Error: no matching ')' in the macro call at %d" % (self.lineno,)) sys.exit(-1) if t == '(': balance += 1 @@ -143,13 +144,13 @@ class CppHeaderParser(object): angle_stack.append(0) elif w == "," or w == '>': if not angle_stack: - print "Error at %d: argument contains ',' or '>' not within template arguments" % (self.lineno,) + print("Error at %d: argument contains ',' or '>' not within template arguments" % (self.lineno,)) sys.exit(-1) if w == ",": arg_type += "_and_" elif w == ">": if angle_stack[0] == 0: - print "Error at %s:%d: template has no arguments" % (self.hname, self.lineno) + print("Error at %s:%d: template has no arguments" % (self.hname, self.lineno)) sys.exit(-1) if angle_stack[0] > 1: arg_type += "_end_" @@ -173,7 +174,7 @@ class CppHeaderParser(object): p1 = arg_name.find("[") p2 = arg_name.find("]",p1+1) if p2 < 0: - print "Error at %d: no closing ]" % (self.lineno,) + print("Error at %d: no closing ]" % (self.lineno,)) sys.exit(-1) counter_str = arg_name[p1+1:p2].strip() if counter_str == "": @@ -358,7 +359,7 @@ class CppHeaderParser(object): if bool(re.match(r".*\)\s*const(\s*=\s*0)?", decl_str)): decl[2].append("/C") if "virtual" in decl_str: - print decl_str + print(decl_str) return decl def parse_func_decl(self, decl_str): @@ -412,12 +413,12 @@ class CppHeaderParser(object): if decl_str.startswith("CVAPI"): rtype_end = decl_str.find(")", args_begin+1) if rtype_end < 0: - print "Error at %d. no terminating ) in CVAPI() macro: %s" % (self.lineno, decl_str) + print("Error at %d. no terminating ) in CVAPI() macro: %s" % (self.lineno, decl_str)) sys.exit(-1) decl_str = decl_str[args_begin+1:rtype_end] + " " + decl_str[rtype_end+1:] args_begin = decl_str.find("(") if args_begin < 0: - print "Error at %d: no args in '%s'" % (self.lineno, decl_str) + print("Error at %d: no args in '%s'" % (self.lineno, decl_str)) sys.exit(-1) decl_start = decl_str[:args_begin].strip() @@ -425,7 +426,7 @@ class CppHeaderParser(object): if decl_start.endswith("operator"): args_begin = decl_str.find("(", args_begin+1) if args_begin < 0: - print "Error at %d: no args in '%s'" % (self.lineno, decl_str) + print("Error at %d: no args in '%s'" % (self.lineno, decl_str)) sys.exit(-1) decl_start = decl_str[:args_begin].strip() # TODO: normalize all type of operators @@ -455,7 +456,7 @@ class CppHeaderParser(object): return [] # exotic - dynamic 2d array else: #print rettype, funcname, modlist, argno - print "Error at %s:%d the function/method name is missing: '%s'" % (self.hname, self.lineno, decl_start) + print("Error at %s:%d the function/method name is missing: '%s'" % (self.hname, self.lineno, decl_start)) sys.exit(-1) if self.wrap_mode and (("::" in funcname) or funcname.startswith("~")): @@ -486,9 +487,9 @@ class CppHeaderParser(object): npos += 1 t, npos = self.find_next_token(decl_str, ["(", ")", ",", "<", ">"], npos) if not t: - print "Error: no closing ')' at %d" % (self.lineno,) - print decl_str - print decl_str[arg_start:] + print("Error: no closing ')' at %d" % (self.lineno,)) + print(decl_str) + print(decl_str[arg_start:]) sys.exit(-1) if t == "<": angle_balance += 1 @@ -583,7 +584,7 @@ class CppHeaderParser(object): if block_type in ["file", "enum"]: continue if block_type not in ["struct", "class", "namespace"]: - print "Error at %d: there are non-valid entries in the current block stack " % (self.lineno, self.block_stack) + print("Error at %d: there are non-valid entries in the current block stack " % (self.lineno, self.block_stack)) sys.exit(-1) if block_name: n += block_name + "." @@ -605,7 +606,7 @@ class CppHeaderParser(object): stmt_type = "block" if context == "block": - print "Error at %d: should not call parse_stmt inside blocks" % (self.lineno,) + print("Error at %d: should not call parse_stmt inside blocks" % (self.lineno,)) sys.exit(-1) if context == "class" or context == "struct": @@ -632,7 +633,7 @@ class CppHeaderParser(object): try: classname, bases, modlist = self.parse_class_decl(stmt[len("typedef "):]) except: - print "Error at %s:%d" % (self.hname, self.lineno) + print("Error at %s:%d" % (self.hname, self.lineno)) exit(1) if classname.startswith("_Ipl"): classname = classname[1:] @@ -647,7 +648,7 @@ class CppHeaderParser(object): try: classname, bases, modlist = self.parse_class_decl(stmt) except: - print "Error at %s:%d" % (self.hname, self.lineno) + print("Error at %s:%d" % (self.hname, self.lineno)) exit(1) decl = [] if ("CV_EXPORTS_W" in stmt) or ("CV_EXPORTS_AS" in stmt) or (not self.wrap_mode):# and ("CV_EXPORTS" in stmt)): @@ -767,7 +768,7 @@ class CppHeaderParser(object): state = SCAN if state != SCAN: - print "Error at %d: invlid state = %d" % (self.lineno, state) + print("Error at %d: invlid state = %d" % (self.lineno, state)) sys.exit(-1) while 1: @@ -795,7 +796,7 @@ class CppHeaderParser(object): while 1: t2, pos2 = self.find_next_token(l, ["\\", "\""], pos2) if t2 == "": - print "Error at %d: no terminating '\"'" % (self.lineno,) + print("Error at %d: no terminating '\"'" % (self.lineno,)) sys.exit(-1) if t2 == "\"": break @@ -836,7 +837,7 @@ class CppHeaderParser(object): if token == "}": if not self.block_stack: - print "Error at %d: the block stack is empty" % (self.lineno,) + print("Error at %d: the block stack is empty" % (self.lineno,)) self.block_stack[-1:] = [] if pos+1 < len(l) and l[pos+1] == ';': pos += 1 @@ -851,13 +852,13 @@ class CppHeaderParser(object): Prints the list of declarations, retrieived by the parse() method """ for d in decls: - print d[0], d[1], ";".join(d[2]) + print(d[0], d[1], ";".join(d[2])) for a in d[3]: - print " ", a[0], a[1], a[2], + print(" ", a[0], a[1], a[2], end="") if a[3]: - print "; ".join(a[3]) + print("; ".join(a[3])) else: - print + print() if __name__ == '__main__': parser = CppHeaderParser() @@ -867,4 +868,4 @@ if __name__ == '__main__': #for hname in sys.argv[1:]: #decls += parser.parse(hname, wmode=False) parser.print_decls(decls) - print len(decls) + print(len(decls)) From fb9781b97bb913c46b2036540072f6b38efdfd37 Mon Sep 17 00:00:00 2001 From: Gabe Schwartz Date: Wed, 12 Jun 2013 16:03:34 -0400 Subject: [PATCH 063/121] Updated cv2.cpp to support Python 3 API. Added a header with defines to enable the module to compile with either python 2 or 3 without changes. --- modules/python/src2/cv2.cpp | 30 ++++++++++++++- modules/python/src2/pycompat.hpp | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 modules/python/src2/pycompat.hpp diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index c834b1f32..384207cd0 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -23,6 +23,8 @@ # include "opencv2/nonfree.hpp" #endif +#include "pycompat.hpp" + using cv::flann::IndexParams; using cv::flann::SearchParams; @@ -1176,7 +1178,11 @@ static int convert_to_char(PyObject *o, char *dst, const char *name = "no_name") } } +#if PY_MAJOR_VERSION >= 3 +#define MKTYPE2(NAME) pyopencv_##NAME##_specials(); if (!to_ok(&pyopencv_##NAME##_Type)) return NULL; +#else #define MKTYPE2(NAME) pyopencv_##NAME##_specials(); if (!to_ok(&pyopencv_##NAME##_Type)) return +#endif #ifdef __GNUC__ # pragma GCC diagnostic ignored "-Wunused-parameter" @@ -1205,15 +1211,35 @@ static int to_ok(PyTypeObject *to) return (PyType_Ready(to) == 0); } + +#if PY_MAJOR_VERSION >= 3 +extern "C" CV_EXPORTS PyObject* PyInit_cv2(); +static struct PyModuleDef cv2_moduledef = +{ + PyModuleDef_HEAD_INIT, + MODULESTR, + "Python wrapper for OpenCV.", + -1, /* size of per-interpreter state of the module, + or -1 if the module keeps state in global variables. */ + methods +}; + +PyObject* PyInit_cv2() +#else extern "C" CV_EXPORTS void initcv2(); void initcv2() +#endif { import_array(); #include "pyopencv_generated_type_reg.h" +#if PY_MAJOR_VERSION >= 3 + PyObject* m = PyModule_Create(&cv2_moduledef); +#else PyObject* m = Py_InitModule(MODULESTR, methods); +#endif PyObject* d = PyModule_GetDict(m); PyDict_SetItemString(d, "__version__", PyString_FromString(CV_VERSION)); @@ -1262,5 +1288,7 @@ void initcv2() PUBLISH(CV_64FC4); #include "pyopencv_generated_const_reg.h" - +#if PY_MAJOR_VERSION >= 3 + return m; +#endif } diff --git a/modules/python/src2/pycompat.hpp b/modules/python/src2/pycompat.hpp new file mode 100644 index 000000000..c473fffb2 --- /dev/null +++ b/modules/python/src2/pycompat.hpp @@ -0,0 +1,64 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + +// Defines for Python 2/3 compatibility. +#ifndef __PYCOMPAT_HPP__ +#define __PYCOMPAT_HPP__ + +#if PY_MAJOR_VERSION >= 3 +// Python3 treats all ints as longs, PyInt_X functions have been removed. +#define PyInt_Check PyLong_Check +#define PyInt_CheckExact PyLong_CheckExact +#define PyInt_AsLong PyLong_AsLong +#define PyInt_AS_LONG PyLong_AS_LONG +#define PyInt_FromLong PyLong_FromLong +#define PyNumber_Int PyNumber_Long + +// Python3 strings are unicode, these defines mimic the Python2 functionality. +#define PyString_Check PyUnicode_Check +#define PyString_FromString PyUnicode_FromString +#define PyString_AsString PyUnicode_AsUTF8 +#define PyString_FromStringAndSize PyUnicode_FromStringAndSize +#define PyString_Size PyUnicode_GET_SIZE +#endif + +#endif // END HEADER GUARD From bce1b352e7df82644f8232465e126f74d1792f07 Mon Sep 17 00:00:00 2001 From: Gabe Schwartz Date: Wed, 12 Jun 2013 16:04:34 -0400 Subject: [PATCH 064/121] Updated cmake files to use print(). In these cases, just using print() is enough to ensure py2/3 compatibility. --- cmake/OpenCVDetectPython.cmake | 6 +++--- modules/python/CMakeLists.txt | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmake/OpenCVDetectPython.cmake b/cmake/OpenCVDetectPython.cmake index d606a650a..f27176d66 100644 --- a/cmake/OpenCVDetectPython.cmake +++ b/cmake/OpenCVDetectPython.cmake @@ -49,7 +49,7 @@ if(PYTHON_EXECUTABLE) if(NOT ANDROID AND NOT IOS) if(CMAKE_HOST_UNIX) - execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print get_python_lib()" + execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "from distutils.sysconfig import *; print(get_python_lib())" RESULT_VARIABLE PYTHON_CVPY_PROCESS OUTPUT_VARIABLE PYTHON_STD_PACKAGES_PATH OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -80,7 +80,7 @@ if(PYTHON_EXECUTABLE) if(NOT PYTHON_NUMPY_INCLUDE_DIR) # Attempt to discover the NumPy include directory. If this succeeds, then build python API with NumPy - execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['DISTUTILS_USE_SDK']='1'; import numpy.distutils; print numpy.distutils.misc_util.get_numpy_include_dirs()[0]" + execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import os; os.environ['DISTUTILS_USE_SDK']='1'; import numpy.distutils; print(numpy.distutils.misc_util.get_numpy_include_dirs()[0])" RESULT_VARIABLE PYTHON_NUMPY_PROCESS OUTPUT_VARIABLE PYTHON_NUMPY_INCLUDE_DIR OUTPUT_STRIP_TRAILING_WHITESPACE) @@ -92,7 +92,7 @@ if(PYTHON_EXECUTABLE) endif() if(PYTHON_NUMPY_INCLUDE_DIR) - execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import numpy; print numpy.version.version" + execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import numpy; print(numpy.version.version)" RESULT_VARIABLE PYTHON_NUMPY_PROCESS OUTPUT_VARIABLE PYTHON_NUMPY_VERSION OUTPUT_STRIP_TRAILING_WHITESPACE) diff --git a/modules/python/CMakeLists.txt b/modules/python/CMakeLists.txt index 119c8e1bd..0b4c59d63 100644 --- a/modules/python/CMakeLists.txt +++ b/modules/python/CMakeLists.txt @@ -67,7 +67,7 @@ else() endif() target_link_libraries(${the_module} ${OPENCV_MODULE_${the_module}_DEPS}) -execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import distutils.sysconfig; print distutils.sysconfig.get_config_var('SO')" +execute_process(COMMAND ${PYTHON_EXECUTABLE} -c "import distutils.sysconfig; print(distutils.sysconfig.get_config_var('SO'))" RESULT_VARIABLE PYTHON_CVPY_PROCESS OUTPUT_VARIABLE CVPY_SUFFIX OUTPUT_STRIP_TRAILING_WHITESPACE) From bb209193c98744e90148be6e9a006a46090d6de2 Mon Sep 17 00:00:00 2001 From: Gabe Schwartz Date: Mon, 10 Jun 2013 17:29:36 -0400 Subject: [PATCH 065/121] Updated usage of METH_X definitions for python 3. Python 3 requires METH_VARARGS | METH_KEYWORDS, not just METH_KEYWORDS. --- modules/python/src2/cv2.cpp | 2 +- modules/python/src2/gen2.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/python/src2/cv2.cpp b/modules/python/src2/cv2.cpp index 384207cd0..e68da59cf 100644 --- a/modules/python/src2/cv2.cpp +++ b/modules/python/src2/cv2.cpp @@ -1196,7 +1196,7 @@ static PyMethodDef methods[] = { #include "pyopencv_generated_func_tab.h" {"createTrackbar", pycvCreateTrackbar, METH_VARARGS, "createTrackbar(trackbarName, windowName, value, count, onChange) -> None"}, - {"setMouseCallback", (PyCFunction)pycvSetMouseCallback, METH_KEYWORDS, "setMouseCallback(windowName, onMouse [, param]) -> None"}, + {"setMouseCallback", (PyCFunction)pycvSetMouseCallback, METH_VARARGS | METH_KEYWORDS, "setMouseCallback(windowName, onMouse [, param]) -> None"}, {NULL, NULL}, }; diff --git a/modules/python/src2/gen2.py b/modules/python/src2/gen2.py index 5ef123af6..816a386c0 100755 --- a/modules/python/src2/gen2.py +++ b/modules/python/src2/gen2.py @@ -543,7 +543,7 @@ class FuncInfo(object): p2 = s.rfind(")") docstring_list = [s[:p1+1] + "[" + s[p1+1:p2] + "]" + s[p2:]] - return Template(' {"$py_funcname", (PyCFunction)$wrap_funcname, METH_KEYWORDS, "$py_docstring"},\n' + return Template(' {"$py_funcname", (PyCFunction)$wrap_funcname, METH_VARARGS | METH_KEYWORDS, "$py_docstring"},\n' ).substitute(py_funcname = self.variants[0].wname, wrap_funcname=self.get_wrapper_name(), py_docstring = " or ".join(docstring_list)) From 7d70399d7210da8afe7b3f62930c55a68721c746 Mon Sep 17 00:00:00 2001 From: Peter Minin Date: Fri, 14 Jun 2013 04:25:17 +0400 Subject: [PATCH 066/121] Add a new variant of detectMultiScale with output arguments 'objects' and 'numDetections'; factor most of detectMultiScale's code into 2 protected methods --- .../objdetect/doc/cascade_classification.rst | 3 + .../objdetect/include/opencv2/objdetect.hpp | 15 ++- modules/objdetect/src/cascadedetect.cpp | 115 ++++++++++++------ 3 files changed, 96 insertions(+), 37 deletions(-) diff --git a/modules/objdetect/doc/cascade_classification.rst b/modules/objdetect/doc/cascade_classification.rst index 961cf0aa5..46272d2f0 100644 --- a/modules/objdetect/doc/cascade_classification.rst +++ b/modules/objdetect/doc/cascade_classification.rst @@ -189,6 +189,7 @@ CascadeClassifier::detectMultiScale Detects objects of different sizes in the input image. The detected objects are returned as a list of rectangles. .. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) +.. ocv:function:: void CascadeClassifier::detectMultiScale( const Mat& image, vector& objects, vector& numDetections, double scaleFactor=1.1, int minNeighbors=3, int flags=0, Size minSize=Size(), Size maxSize=Size()) .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize]]]]]) -> objects .. ocv:pyfunction:: cv2.CascadeClassifier.detectMultiScale(image[, scaleFactor[, minNeighbors[, flags[, minSize[, maxSize[, outputRejectLevels]]]]]]) -> objects, rejectLevels, levelWeights @@ -201,6 +202,8 @@ Detects objects of different sizes in the input image. The detected objects are :param objects: Vector of rectangles where each rectangle contains the detected object, the rectangles may be partially outside the original image. + :param numDetections: Vector of detection numbers for the corresponding objects. An object's number of detections is the number of neighboring positively classified rectangles that were joined together to form the object. + :param scaleFactor: Parameter specifying how much the image size is reduced at each image scale. :param minNeighbors: Parameter specifying how many neighbors each candidate rectangle should have to retain it. diff --git a/modules/objdetect/include/opencv2/objdetect.hpp b/modules/objdetect/include/opencv2/objdetect.hpp index 3ccb057e3..d263b2eb7 100644 --- a/modules/objdetect/include/opencv2/objdetect.hpp +++ b/modules/objdetect/include/opencv2/objdetect.hpp @@ -149,6 +149,14 @@ public: Size minSize = Size(), Size maxSize = Size() ); + CV_WRAP virtual void detectMultiScale( const Mat& image, + CV_OUT std::vector& objects, + CV_OUT std::vector& numDetections, + double scaleFactor=1.1, + int minNeighbors=3, int flags=0, + Size minSize=Size(), + Size maxSize=Size() ); + CV_WRAP virtual void detectMultiScale( const Mat& image, CV_OUT std::vector& objects, CV_OUT std::vector& rejectLevels, @@ -168,7 +176,12 @@ public: protected: virtual bool detectSingleScale( const Mat& image, int stripCount, Size processingRectSize, int stripSize, int yStep, double factor, std::vector& candidates, - std::vector& rejectLevels, std::vector& levelWeights, bool outputRejectLevels = false); + std::vector& rejectLevels, std::vector& levelWeights, bool outputRejectLevels = false ); + + virtual void detectMultiScaleNoGrouping( const Mat& image, std::vector& candidates, + std::vector& rejectLevels, std::vector& levelWeights, + double scaleFactor, Size minObjectSize, Size maxObjectSize, + bool outputRejectLevels = false ); protected: enum { BOOST = 0 diff --git a/modules/objdetect/src/cascadedetect.cpp b/modules/objdetect/src/cascadedetect.cpp index 6d1b287d8..13422b97e 100644 --- a/modules/objdetect/src/cascadedetect.cpp +++ b/modules/objdetect/src/cascadedetect.cpp @@ -1022,6 +1022,7 @@ public: }; struct getRect { Rect operator ()(const CvAvgComp& e) const { return e.rect; } }; +struct getNeighbors { int operator ()(const CvAvgComp& e) const { return e.neighbors; } }; bool CascadeClassifier::detectSingleScale( const Mat& image, int stripCount, Size processingRectSize, @@ -1086,39 +1087,33 @@ bool CascadeClassifier::setImage(const Mat& image) return featureEvaluator->setImage(image, data.origWinSize); } -void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& objects, - std::vector& rejectLevels, - std::vector& levelWeights, - double scaleFactor, int minNeighbors, - int flags, Size minObjectSize, Size maxObjectSize, - bool outputRejectLevels ) +static void detectMultiScaleOldFormat( const Mat& image, Ptr oldCascade, + std::vector& objects, + std::vector& rejectLevels, + std::vector& levelWeights, + std::vector& vecAvgComp, + double scaleFactor, int minNeighbors, + int flags, Size minObjectSize, Size maxObjectSize, + bool outputRejectLevels = false ) { - const double GROUP_EPS = 0.2; + MemStorage storage(cvCreateMemStorage(0)); + CvMat _image = image; + CvSeq* _objects = cvHaarDetectObjectsForROC( &_image, oldCascade, storage, rejectLevels, levelWeights, scaleFactor, + minNeighbors, flags, minObjectSize, maxObjectSize, outputRejectLevels ); + Seq(_objects).copyTo(vecAvgComp); + objects.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), objects.begin(), getRect()); +} - CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); +void CascadeClassifier::detectMultiScaleNoGrouping( const Mat& image, std::vector& candidates, + std::vector& rejectLevels, std::vector& levelWeights, + double scaleFactor, Size minObjectSize, Size maxObjectSize, + bool outputRejectLevels ) +{ + candidates.clear(); - if( empty() ) - return; - - if( isOldFormatCascade() ) - { - MemStorage storage(cvCreateMemStorage(0)); - CvMat _image = image; - CvSeq* _objects = cvHaarDetectObjectsForROC( &_image, oldCascade, storage, rejectLevels, levelWeights, scaleFactor, - minNeighbors, flags, minObjectSize, maxObjectSize, outputRejectLevels ); - std::vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - objects.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), objects.begin(), getRect()); - return; - } - - objects.clear(); - - if (!maskGenerator.empty()) { + if (!maskGenerator.empty()) maskGenerator->initializeMask(image); - } - if( maxObjectSize.height == 0 || maxObjectSize.width == 0 ) maxObjectSize = image.size(); @@ -1132,7 +1127,6 @@ void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& o } Mat imageBuffer(image.rows + 1, image.cols + 1, CV_8U); - std::vector candidates; for( double factor = 1; ; factor *= scaleFactor ) { @@ -1173,18 +1167,39 @@ void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& o rejectLevels, levelWeights, outputRejectLevels ) ) break; } +} +void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& objects, + std::vector& rejectLevels, + std::vector& levelWeights, + double scaleFactor, int minNeighbors, + int flags, Size minObjectSize, Size maxObjectSize, + bool outputRejectLevels ) +{ + CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); - objects.resize(candidates.size()); - std::copy(candidates.begin(), candidates.end(), objects.begin()); + if( empty() ) + return; - if( outputRejectLevels ) + if( isOldFormatCascade() ) { - groupRectangles( objects, rejectLevels, levelWeights, minNeighbors, GROUP_EPS ); + std::vector fakeVecAvgComp; + detectMultiScaleOldFormat( image, oldCascade, objects, rejectLevels, levelWeights, fakeVecAvgComp, scaleFactor, + minNeighbors, flags, minObjectSize, maxObjectSize, outputRejectLevels ); } else { - groupRectangles( objects, minNeighbors, GROUP_EPS ); + detectMultiScaleNoGrouping( image, objects, rejectLevels, levelWeights, scaleFactor, minObjectSize, maxObjectSize, + outputRejectLevels ); + const double GROUP_EPS = 0.2; + if( outputRejectLevels ) + { + groupRectangles( objects, rejectLevels, levelWeights, minNeighbors, GROUP_EPS ); + } + else + { + groupRectangles( objects, minNeighbors, GROUP_EPS ); + } } } @@ -1195,7 +1210,35 @@ void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& o std::vector fakeLevels; std::vector fakeWeights; detectMultiScale( image, objects, fakeLevels, fakeWeights, scaleFactor, - minNeighbors, flags, minObjectSize, maxObjectSize, false ); + minNeighbors, flags, minObjectSize, maxObjectSize ); +} + +void CascadeClassifier::detectMultiScale( const Mat& image, std::vector& objects, + std::vector& numDetections, double scaleFactor, + int minNeighbors, int flags, Size minObjectSize, + Size maxObjectSize ) +{ + CV_Assert( scaleFactor > 1 && image.depth() == CV_8U ); + + if( empty() ) + return; + + std::vector fakeLevels; + std::vector fakeWeights; + if( isOldFormatCascade() ) + { + std::vector vecAvgComp; + detectMultiScaleOldFormat( image, oldCascade, objects, fakeLevels, fakeWeights, vecAvgComp, scaleFactor, + minNeighbors, flags, minObjectSize, maxObjectSize ); + numDetections.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), numDetections.begin(), getNeighbors()); + } + else + { + detectMultiScaleNoGrouping( image, objects, fakeLevels, fakeWeights, scaleFactor, minObjectSize, maxObjectSize ); + const double GROUP_EPS = 0.2; + groupRectangles( objects, numDetections, minNeighbors, GROUP_EPS ); + } } bool CascadeClassifier::Data::read(const FileNode &root) From fc82150edc219dabf680561f26ae5c2bb8d3c040 Mon Sep 17 00:00:00 2001 From: Ivan Korolev Date: Fri, 14 Jun 2013 08:21:42 +0400 Subject: [PATCH 067/121] Fixed a bug #2892 --- modules/nonfree/src/sift.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/nonfree/src/sift.cpp b/modules/nonfree/src/sift.cpp index 58ebd3101..5a7fd8940 100644 --- a/modules/nonfree/src/sift.cpp +++ b/modules/nonfree/src/sift.cpp @@ -774,9 +774,6 @@ void SIFT::operator()(InputArray _image, InputArray _mask, findScaleSpaceExtrema(gpyr, dogpyr, keypoints); KeyPointsFilter::removeDuplicated( keypoints ); - if( !mask.empty() ) - KeyPointsFilter::runByPixelsMask( keypoints, mask ); - if( nfeatures > 0 ) KeyPointsFilter::retainBest(keypoints, nfeatures); //t = (double)getTickCount() - t; @@ -791,6 +788,9 @@ void SIFT::operator()(InputArray _image, InputArray _mask, kpt.pt *= scale; kpt.size *= scale; } + + if( !mask.empty() ) + KeyPointsFilter::runByPixelsMask( keypoints, mask ); } else { From 58fa401b4d0cefe763e4b307802f34c96a942a44 Mon Sep 17 00:00:00 2001 From: Ivan Korolev Date: Fri, 14 Jun 2013 10:43:20 +0400 Subject: [PATCH 068/121] Fixed a bug #2405 --- modules/stitching/src/motion_estimators.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/stitching/src/motion_estimators.cpp b/modules/stitching/src/motion_estimators.cpp index ab27a46a2..c873bc721 100644 --- a/modules/stitching/src/motion_estimators.cpp +++ b/modules/stitching/src/motion_estimators.cpp @@ -69,13 +69,13 @@ struct CalcRotation K_from(0,0) = cameras[edge.from].focal; K_from(1,1) = cameras[edge.from].focal * cameras[edge.from].aspect; K_from(0,2) = cameras[edge.from].ppx; - K_from(0,2) = cameras[edge.from].ppy; + K_from(1,2) = cameras[edge.from].ppy; Mat_ K_to = Mat::eye(3, 3, CV_64F); K_to(0,0) = cameras[edge.to].focal; K_to(1,1) = cameras[edge.to].focal * cameras[edge.to].aspect; K_to(0,2) = cameras[edge.to].ppx; - K_to(0,2) = cameras[edge.to].ppy; + K_to(1,2) = cameras[edge.to].ppy; Mat R = K_from.inv() * pairwise_matches[pair_idx].H.inv() * K_to; cameras[edge.to].R = cameras[edge.from].R * R; From e6b18fc492e9115043b375c8b005687b24b84746 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 14 Jun 2013 16:37:00 +0800 Subject: [PATCH 069/121] Fix a bug caused by NDEBUG macro; it is now removed. Revise some descriptions of the enums. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 21 +++++++++++++-------- modules/ocl/src/initialization.cpp | 2 +- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index dc58f6f2e..308383b61 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -179,16 +179,21 @@ namespace cv bool cleanUp = true); //! Enable or disable OpenCL program binary caching onto local disk - // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the compiled program to be - // cached onto local disk automatically, which may accelerate subsequent runs. - // Caching mode is controlled by the following enum - // Note, the feature is by default enabled when OpenCV is built in release mode. + // After a program (*.cl files in opencl/ folder) is built at runtime, we allow the + // compiled OpenCL program to be cached to the path automatically as "path/*.clb" + // binary file, which will be reused when the OpenCV executable is started again. + // + // Caching mode is controlled by the following enums + // Notes + // 1. the feature is by default enabled when OpenCV is built in release mode. + // 2. the CACHE_DEBUG / CACHE_RELEASE flags only effectively work with MSVC compiler; + // for GNU compilers, the function always treats the build as release mode (enabled by default). enum { - CACHE_NONE = 0, - CACHE_DEBUG = 0x1 << 0, - CACHE_RELEASE = 0x1 << 1, - CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, + CACHE_NONE = 0, // do not cache OpenCL binary + CACHE_DEBUG = 0x1 << 0, // cache OpenCL binary when built in debug mode (only work with MSVC) + CACHE_RELEASE = 0x1 << 1, // default behavior, only cache when built in release mode (only work with MSVC) + CACHE_ALL = CACHE_DEBUG | CACHE_RELEASE, // always cache opencl binary CACHE_UPDATE = 0x1 << 2 // if the binary cache file with the same name is already on the disk, it will be updated. }; CV_EXPORTS void setBinaryDiskCache(int mode = CACHE_RELEASE, cv::String path = "./"); diff --git a/modules/ocl/src/initialization.cpp b/modules/ocl/src/initialization.cpp index 9a0915ce5..bdae7059e 100644 --- a/modules/ocl/src/initialization.cpp +++ b/modules/ocl/src/initialization.cpp @@ -508,7 +508,7 @@ namespace cv { impl->update_disk_cache = (mode & CACHE_UPDATE) == CACHE_UPDATE; impl->enable_disk_cache = -#if !defined(NDEBUG) || defined(_DEBUG) +#ifdef _DEBUG (mode & CACHE_DEBUG) == CACHE_DEBUG; #else (mode & CACHE_RELEASE) == CACHE_RELEASE; From a4750f49c62f6b3f97715f1b430ac97b7d88b3a7 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 14 Jun 2013 12:53:44 +0400 Subject: [PATCH 070/121] fix for bug #3068 (PCA::computeVar for double input): The matrix g can have CV_32F or CV_64F type, but g.at uses only float template. This fix adds specialization for double type. --- modules/core/src/matmul.cpp | 57 +++++++++++++++++++++++-------------- 1 file changed, 36 insertions(+), 21 deletions(-) diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 5988363d3..05a0c5552 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2855,9 +2855,9 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp if( _mean.data ) { - CV_Assert( _mean.size() == mean_sz ); + CV_Assert( _mean.size() == mean_sz ); _mean.convertTo(mean, ctype); - covar_flags |= CV_COVAR_USE_AVG; + covar_flags |= CV_COVAR_USE_AVG; } calcCovarMatrix( data, covar, mean, covar_flags, ctype ); @@ -2901,6 +2901,36 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp return *this; } +template +int computeCumulativeEnergy(const Mat& eigenvalues, double retainedVariance) +{ + CV_DbgAssert( eigenvalues.type() == DataType::type ); + + Mat g(eigenvalues.size(), DataType::type); + + for(int ig = 0; ig < g.rows; ig++) + { + g.at(ig, 0) = 0; + for(int im = 0; im <= ig; im++) + { + g.at(ig,0) += eigenvalues.at(im,0); + } + } + + int L; + + for(L = 0; L < eigenvalues.rows; L++) + { + double energy = g.at(L, 0) / g.at(g.rows - 1, 0); + if(energy > retainedVariance) + break; + } + + L = std::max(2, L); + + return L; +} + PCA& PCA::computeVar(InputArray _data, InputArray __mean, int flags, double retainedVariance) { Mat data = _data.getMat(), _mean = __mean.getMat(); @@ -2977,26 +3007,11 @@ PCA& PCA::computeVar(InputArray _data, InputArray __mean, int flags, double reta } // compute the cumulative energy content for each eigenvector - Mat g(eigenvalues.size(), ctype); - - for(int ig = 0; ig < g.rows; ig++) - { - g.at(ig,0) = 0; - for(int im = 0; im <= ig; im++) - { - g.at(ig,0) += eigenvalues.at(im,0); - } - } - int L; - for(L = 0; L < eigenvalues.rows; L++) - { - double energy = g.at(L, 0) / g.at(g.rows - 1, 0); - if(energy > retainedVariance) - break; - } - - L = std::max(2, L); + if (ctype == CV_32F) + L = computeCumulativeEnergy(eigenvalues, retainedVariance); + else + L = computeCumulativeEnergy(eigenvalues, retainedVariance); // use clone() to physically copy the data and thus deallocate the original matrices eigenvalues = eigenvalues.rowRange(0,L).clone(); From 93200922fd75921ee74fbc8a176d3d9a3fdd0bd8 Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Fri, 14 Jun 2013 13:12:35 +0400 Subject: [PATCH 071/121] Fix bug with indices --- .../calib3d/camera_calibration/camera_calibration.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst b/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst index 9196c87d6..6637e2590 100644 --- a/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst +++ b/doc/tutorials/calib3d/camera_calibration/camera_calibration.rst @@ -12,8 +12,8 @@ For the distortion OpenCV takes into account the radial and tangential factors. .. math:: - x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k^3 r^6) \\ - y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k^3 r^6) + x_{corrected} = x( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) \\ + y_{corrected} = y( 1 + k_1 r^2 + k_2 r^4 + k_3 r^6) So for an old pixel point at :math:`(x,y)` coordinate in the input image, for a corrected output image its position will be :math:`(x_{corrected} y_{corrected})` . The presence of the radial distortion manifests in form of the "barrel" or "fish-eye" effect. From 0cee15eb7f8e10361e008b0428f70e9a781a75d6 Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Fri, 14 Jun 2013 15:10:25 +0400 Subject: [PATCH 072/121] Updated iOS camera. Added rotation flag. Added functions to lock/unlock focus, white balance and exposure. --- .../highgui/include/opencv2/highgui/cap_ios.h | 12 ++- .../highgui/src/cap_ios_abstract_camera.mm | 85 +++++++++++++++++++ modules/highgui/src/cap_ios_photo_camera.mm | 2 +- modules/highgui/src/cap_ios_video_camera.mm | 53 +++++++++--- 4 files changed, 138 insertions(+), 14 deletions(-) diff --git a/modules/highgui/include/opencv2/highgui/cap_ios.h b/modules/highgui/include/opencv2/highgui/cap_ios.h index 5bd5fe3c6..db3928f13 100644 --- a/modules/highgui/include/opencv2/highgui/cap_ios.h +++ b/modules/highgui/include/opencv2/highgui/cap_ios.h @@ -1,6 +1,4 @@ -/* - * cap_ios.h - * For iOS video I/O +/* For iOS video I/O * by Eduard Feicho on 29/07/12 * Copyright 2012. All rights reserved. * @@ -90,6 +88,12 @@ - (void)createVideoPreviewLayer; - (void)updateOrientation; +- (void)lockFocus; +- (void)unlockFocus; +- (void)lockExposure; +- (void)unlockExposure; +- (void)lockBalance; +- (void)unlockBalance; @end @@ -116,6 +120,7 @@ BOOL grayscaleMode; BOOL recordVideo; + BOOL rotateVideo; AVAssetWriterInput* recordAssetWriterInput; AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; AVAssetWriter* recordAssetWriter; @@ -128,6 +133,7 @@ @property (nonatomic, assign) BOOL grayscaleMode; @property (nonatomic, assign) BOOL recordVideo; +@property (nonatomic, assign) BOOL rotateVideo; @property (nonatomic, retain) AVAssetWriterInput* recordAssetWriterInput; @property (nonatomic, retain) AVAssetWriterInputPixelBufferAdaptor* recordPixelBufferAdaptor; @property (nonatomic, retain) AVAssetWriter* recordAssetWriter; diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm index b6a7d944f..a0e8f3e8b 100644 --- a/modules/highgui/src/cap_ios_abstract_camera.mm +++ b/modules/highgui/src/cap_ios_abstract_camera.mm @@ -405,4 +405,89 @@ } } +- (void)lockFocus; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isFocusModeSupported:AVCaptureFocusModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.focusMode = AVCaptureFocusModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked focus configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockFocus; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isFocusModeSupported:AVCaptureFocusModeContinuousAutoFocus]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.focusMode = AVCaptureFocusModeContinuousAutoFocus; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autofocus configuration %@", [error localizedDescription]); + } + } +} + +- (void)lockExposure; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isExposureModeSupported:AVCaptureExposureModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.exposureMode = AVCaptureExposureModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockExposure; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isExposureModeSupported:AVCaptureExposureModeContinuousAutoExposure]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.exposureMode = AVCaptureExposureModeContinuousAutoExposure; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]); + } + } +} + +- (void)lockBalance; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeLocked]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]); + } + } +} + +- (void) unlockBalance; +{ + AVCaptureDevice *device = [AVCaptureDevice defaultDeviceWithMediaType:AVMediaTypeVideo]; + if ([device isWhiteBalanceModeSupported:AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance]) { + NSError *error = nil; + if ([device lockForConfiguration:&error]) { + device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance; + [device unlockForConfiguration]; + } else { + NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]); + } + } +} + @end + diff --git a/modules/highgui/src/cap_ios_photo_camera.mm b/modules/highgui/src/cap_ios_photo_camera.mm index f05cfa5f8..f8891f227 100644 --- a/modules/highgui/src/cap_ios_photo_camera.mm +++ b/modules/highgui/src/cap_ios_photo_camera.mm @@ -32,7 +32,7 @@ #import "opencv2/highgui/cap_ios.h" #include "precomp.hpp" -#pragma mark - Private Interface +#pragma mark - Private Interface mark - Private Interface @interface CvPhotoCamera () diff --git a/modules/highgui/src/cap_ios_video_camera.mm b/modules/highgui/src/cap_ios_video_camera.mm index 1f9ea14bf..588adfc9c 100644 --- a/modules/highgui/src/cap_ios_video_camera.mm +++ b/modules/highgui/src/cap_ios_video_camera.mm @@ -30,7 +30,6 @@ #import "opencv2/highgui/cap_ios.h" #include "precomp.hpp" - #import @@ -70,6 +69,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; @synthesize videoDataOutput; @synthesize recordVideo; +@synthesize rotateVideo; //@synthesize videoFileOutput; @synthesize recordAssetWriterInput; @synthesize recordPixelBufferAdaptor; @@ -85,6 +85,7 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; if (self) { self.useAVCaptureVideoPreviewLayer = NO; self.recordVideo = NO; + self.rotateVideo = NO; } return self; } @@ -269,13 +270,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } - - - #pragma mark - Private Interface - - - (void)createVideoDataOutput; { // Make a video data output @@ -389,6 +385,38 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; [self.parentView.layer addSublayer:self.customPreviewLayer]; } +- (CVPixelBufferRef) pixelBufferFromCGImage: (CGImageRef) image +{ + + CGSize frameSize = CGSizeMake(CGImageGetWidth(image), CGImageGetHeight(image)); + NSDictionary *options = [NSDictionary dictionaryWithObjectsAndKeys: + [NSNumber numberWithBool:NO], kCVPixelBufferCGImageCompatibilityKey, + [NSNumber numberWithBool:NO], kCVPixelBufferCGBitmapContextCompatibilityKey, + nil]; + CVPixelBufferRef pxbuffer = NULL; + CVReturn status = CVPixelBufferCreate(kCFAllocatorDefault, frameSize.width, + frameSize.height, kCVPixelFormatType_32ARGB, (CFDictionaryRef) CFBridgingRetain(options), + &pxbuffer); + NSParameterAssert(status == kCVReturnSuccess && pxbuffer != NULL); + + CVPixelBufferLockBaseAddress(pxbuffer, 0); + void *pxdata = CVPixelBufferGetBaseAddress(pxbuffer); + + + CGColorSpaceRef rgbColorSpace = CGColorSpaceCreateDeviceRGB(); + CGContextRef context = CGBitmapContextCreate(pxdata, frameSize.width, + frameSize.height, 8, 4*frameSize.width, rgbColorSpace, + kCGImageAlphaPremultipliedFirst); + + CGContextDrawImage(context, CGRectMake(0, 0, CGImageGetWidth(image), + CGImageGetHeight(image)), image); + CGColorSpaceRelease(rgbColorSpace); + CGContextRelease(context); + + CVPixelBufferUnlockBaseAddress(pxbuffer, 0); + + return pxbuffer; +} #pragma mark - Protocol AVCaptureVideoDataOutputSampleBufferDelegate @@ -522,7 +550,8 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } if (self.recordAssetWriterInput.readyForMoreMediaData) { - if (! [self.recordPixelBufferAdaptor appendPixelBuffer:imageBuffer + CVImageBufferRef pixelBuffer = [self pixelBufferFromCGImage:dstImage]; + if (! [self.recordPixelBufferAdaptor appendPixelBuffer:pixelBuffer withPresentationTime:lastSampleTime] ) { NSLog(@"Video Writing Error"); } @@ -543,9 +572,12 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; - (void)updateOrientation; { - NSLog(@"rotate.."); - self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height); - [self layoutPreviewLayer]; + if (self.rotateVideo == YES) + { + NSLog(@"rotate.."); + self.customPreviewLayer.bounds = CGRectMake(0, 0, self.parentView.frame.size.width, self.parentView.frame.size.height); + [self layoutPreviewLayer]; + } } @@ -583,3 +615,4 @@ static CGFloat DegreesToRadians(CGFloat degrees) {return degrees * M_PI / 180;}; } @end + From fee81210405ce01bccc810be59c957b8f9d227dc Mon Sep 17 00:00:00 2001 From: Ivan Korolev Date: Fri, 14 Jun 2013 17:03:15 +0400 Subject: [PATCH 073/121] Added regression tests for SURF/SIFT (related to #2892) --- modules/nonfree/test/test_features2d.cpp | 73 ++++++++++++++++++++++++ 1 file changed, 73 insertions(+) diff --git a/modules/nonfree/test/test_features2d.cpp b/modules/nonfree/test/test_features2d.cpp index 001d628aa..4cce77b9d 100644 --- a/modules/nonfree/test/test_features2d.cpp +++ b/modules/nonfree/test/test_features2d.cpp @@ -1146,3 +1146,76 @@ protected: TEST(Features2d_SIFTHomographyTest, regression) { CV_DetectPlanarTest test("SIFT", 80); test.safe_run(); } TEST(Features2d_SURFHomographyTest, regression) { CV_DetectPlanarTest test("SURF", 80); test.safe_run(); } +class FeatureDetectorUsingMaskTest : public cvtest::BaseTest +{ +public: + FeatureDetectorUsingMaskTest(const Ptr& featureDetector) : + featureDetector_(featureDetector) + { + CV_Assert(!featureDetector_.empty()); + } + +protected: + + void run(int) + { + const int nStepX = 2; + const int nStepY = 2; + + const string imageFilename = string(ts->get_data_path()) + "/features2d/tsukuba.png"; + + Mat image = imread(imageFilename); + if(image.empty()) + { + ts->printf(cvtest::TS::LOG, "Image %s can not be read.\n", imageFilename.c_str()); + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_TEST_DATA); + return; + } + + Mat mask(image.size(), CV_8U); + + const int stepX = image.size().width / nStepX; + const int stepY = image.size().height / nStepY; + + vector keyPoints; + vector points; + for(int i=0; idetect(image, keyPoints, mask); + KeyPoint::convert(keyPoints, points); + + for(size_t k=0; kprintf(cvtest::TS::LOG, "The feature point is outside of the mask."); + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT); + return; + } + } + } + + ts->set_failed_test_info( cvtest::TS::OK ); + } + + Ptr featureDetector_; +}; + +TEST(Features2d_SIFT_using_mask, regression) +{ + FeatureDetectorUsingMaskTest test(Algorithm::create("Feature2D.SIFT")); + test.safe_run(); +} + +TEST(DISABLED_Features2d_SURF_using_mask, regression) +{ + FeatureDetectorUsingMaskTest test(Algorithm::create("Feature2D.SURF")); + test.safe_run(); +} + From fcfcd4cbced5e188ffd01758150e07b2210b25ab Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 11:46:49 +0400 Subject: [PATCH 074/121] refactored box filter --- .../gpubgsegm/include/opencv2/gpubgsegm.hpp | 2 +- modules/gpubgsegm/src/gmg.cpp | 4 +- modules/gpufilters/CMakeLists.txt | 2 +- .../gpufilters/include/opencv2/gpufilters.hpp | 70 +++++-- modules/gpufilters/perf/perf_filters.cpp | 4 +- modules/gpufilters/src/filtering.cpp | 196 +++++++++++------- modules/gpufilters/src/precomp.hpp | 7 +- modules/gpufilters/test/test_filters.cpp | 15 +- 8 files changed, 193 insertions(+), 107 deletions(-) diff --git a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp index e7a29b576..3fe62ec94 100644 --- a/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp +++ b/modules/gpubgsegm/include/opencv2/gpubgsegm.hpp @@ -321,7 +321,7 @@ private: GpuMat colors_; GpuMat weights_; - Ptr boxFilter_; + Ptr boxFilter_; GpuMat buf_; }; diff --git a/modules/gpubgsegm/src/gmg.cpp b/modules/gpubgsegm/src/gmg.cpp index a38cbffac..b97f0836f 100644 --- a/modules/gpubgsegm/src/gmg.cpp +++ b/modules/gpubgsegm/src/gmg.cpp @@ -100,7 +100,7 @@ void cv::gpu::GMG_GPU::initialize(cv::Size frameSize, float min, float max) nfeatures_.setTo(cv::Scalar::all(0)); if (smoothingRadius > 0) - boxFilter_ = cv::gpu::createBoxFilter_GPU(CV_8UC1, CV_8UC1, cv::Size(smoothingRadius, smoothingRadius)); + boxFilter_ = cv::gpu::createBoxFilter(CV_8UC1, -1, cv::Size(smoothingRadius, smoothingRadius)); loadConstants(frameSize_.width, frameSize_.height, minVal_, maxVal_, quantizationLevels, backgroundPrior, decisionThreshold, maxFeatures, numInitializationFrames); } @@ -141,7 +141,7 @@ void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat // medianBlur if (smoothingRadius > 0) { - boxFilter_->apply(fgmask, buf_, cv::Rect(0,0,-1,-1), stream); + boxFilter_->apply(fgmask, buf_, stream); int minCount = (smoothingRadius * smoothingRadius + 1) / 2; double thresh = 255.0 * minCount / (smoothingRadius * smoothingRadius); cv::gpu::threshold(buf_, fgmask, thresh, 255.0, cv::THRESH_BINARY, stream); diff --git a/modules/gpufilters/CMakeLists.txt b/modules/gpufilters/CMakeLists.txt index 18f6d7f7b..640de8c11 100644 --- a/modules/gpufilters/CMakeLists.txt +++ b/modules/gpufilters/CMakeLists.txt @@ -6,4 +6,4 @@ set(the_description "GPU-accelerated Image Filtering") ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-declarations) -ocv_define_module(gpufilters opencv_imgproc OPTIONAL opencv_gpuarithm) +ocv_define_module(gpufilters opencv_imgproc opencv_gpuarithm) diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index 582c55d99..5cc2ac49a 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -48,10 +48,61 @@ #endif #include "opencv2/core/gpu.hpp" -#include "opencv2/core/base.hpp" + +#if defined __GNUC__ + #define __OPENCV_GPUFILTERS_DEPR_BEFORE__ + #define __OPENCV_GPUFILTERS_DEPR_AFTER__ __attribute__ ((deprecated)) +#elif (defined WIN32 || defined _WIN32) + #define __OPENCV_GPUFILTERS_DEPR_BEFORE__ __declspec(deprecated) + #define __OPENCV_GPUFILTERS_DEPR_AFTER__ +#else + #define __OPENCV_GPUFILTERS_DEPR_BEFORE__ + #define __OPENCV_GPUFILTERS_DEPR_AFTER__ +#endif namespace cv { namespace gpu { +class CV_EXPORTS Filter : public Algorithm +{ +public: + virtual void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; +}; + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Box Filter + +//! smooths the image using the normalized box filter +//! supports CV_8UC1, CV_8UC4 types +CV_EXPORTS Ptr createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1,-1), + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void boxFilter(InputArray src, OutputArray dst, int dstType, + Size ksize, Point anchor = Point(-1,-1), + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void boxFilter(InputArray src, OutputArray dst, int dstType, Size ksize, Point anchor, Stream& stream) +{ + Ptr f = gpu::createBoxFilter(src.type(), dstType, ksize, anchor); + f->apply(src, dst, stream); +} + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void blur(InputArray src, OutputArray dst, Size ksize, + Point anchor = Point(-1,-1), + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stream& stream) +{ + Ptr f = gpu::createBoxFilter(src.type(), -1, ksize, anchor); + f->apply(src, dst, stream); +} + + + + + + + + /*! The Base Class for 1D or Row-wise Filters @@ -128,13 +179,7 @@ CV_EXPORTS Ptr getRowSumFilter_GPU(int srcType, int sumType, //! supports only CV_8UC1 sum type and CV_32FC1 dst type CV_EXPORTS Ptr getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1); -//! returns 2D box filter -//! supports CV_8UC1 and CV_8UC4 source type, dst type must be the same as source type -CV_EXPORTS Ptr getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1)); -//! returns box filter engine -CV_EXPORTS Ptr createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, - const Point& anchor = Point(-1,-1)); //! returns 2D morphological filter //! only MORPH_ERODE and MORPH_DILATE are supported @@ -205,15 +250,7 @@ CV_EXPORTS Ptr getMaxFilter_GPU(int srcType, int dstType, const //! returns minimum filter CV_EXPORTS Ptr getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); -//! smooths the image using the normalized box filter -//! supports CV_8UC1, CV_8UC4 types -CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()); -//! a synonym for normalized box filter -static inline void blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) -{ - boxFilter(src, dst, -1, ksize, anchor, stream); -} //! erodes the image (applies the local minimum operator) CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1); @@ -266,4 +303,7 @@ CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize }} // namespace cv { namespace gpu { +#undef __OPENCV_GPUFILTERS_DEPR_BEFORE__ +#undef __OPENCV_GPUFILTERS_DEPR_AFTER__ + #endif /* __OPENCV_GPUFILTERS_HPP__ */ diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp index 64cf4cc5d..35c4a94fb 100644 --- a/modules/gpufilters/perf/perf_filters.cpp +++ b/modules/gpufilters/perf/perf_filters.cpp @@ -70,7 +70,9 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur, const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize)); + cv::Ptr blurFilter = cv::gpu::createBoxFilter(d_src.type(), -1, cv::Size(ksize, ksize)); + + TEST_CYCLE() blurFilter->apply(d_src, dst); GPU_SANITY_CHECK(dst, 1); } diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index d40293d4a..35df05ec6 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -47,13 +47,13 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) +Ptr cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } + Ptr cv::gpu::createFilter2D_GPU(const Ptr&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getBoxFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createBoxFilter_GPU(int, int, const Size&, const Point&) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } @@ -70,7 +70,6 @@ Ptr cv::gpu::createGaussianFilter_GPU(int, Size, GpuMat&, doub Ptr cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } -void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point, Stream&) { throw_no_cuda(); } void cv::gpu::erode(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_cuda(); } void cv::gpu::erode(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_cuda(); } @@ -92,20 +91,135 @@ void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double, int, Stream&) namespace { - inline void normalizeAnchor(int& anchor, int ksize) + void normalizeAnchor(int& anchor, int ksize) { if (anchor < 0) anchor = ksize >> 1; - CV_Assert(0 <= anchor && anchor < ksize); + CV_Assert( 0 <= anchor && anchor < ksize ); } - inline void normalizeAnchor(Point& anchor, const Size& ksize) + void normalizeAnchor(Point& anchor, Size ksize) { normalizeAnchor(anchor.x, ksize.width); normalizeAnchor(anchor.y, ksize.height); } +} +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Box Filter + +namespace +{ + class NPPBoxFilter : public Filter + { + public: + NPPBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + typedef NppStatus (*nppFilterBox_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, + NppiSize oSizeROI, NppiSize oMaskSize, NppiPoint oAnchor); + + Size ksize_; + Point anchor_; + int type_; + nppFilterBox_t func_; + int borderMode_; + Scalar borderVal_; + GpuMat srcBorder_; + }; + + NPPBoxFilter::NPPBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal) : + ksize_(ksize), anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal) + { + static const nppFilterBox_t funcs[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R}; + + CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 ); + CV_Assert( dstType == srcType ); + + normalizeAnchor(anchor_, ksize); + + func_ = funcs[CV_MAT_CN(srcType)]; + } + + void NPPBoxFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == type_ ); + + gpu::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + GpuMat srcRoi = srcBorder_(Rect(ksize_.width, ksize_.height, src.cols, src.rows)); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + NppStreamHandler h(stream); + + NppiSize oSizeROI; + oSizeROI.width = src.cols; + oSizeROI.height = src.rows; + + NppiSize oMaskSize; + oMaskSize.height = ksize_.height; + oMaskSize.width = ksize_.width; + + NppiPoint oAnchor; + oAnchor.x = anchor_.x; + oAnchor.y = anchor_.y; + + nppSafeCall( func_(srcRoi.ptr(), static_cast(srcRoi.step), + dst.ptr(), static_cast(dst.step), + oSizeROI, oMaskSize, oAnchor) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +Ptr cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point anchor, int borderMode, Scalar borderVal) +{ + if (dstType < 0) + dstType = srcType; + + return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +namespace +{ inline void normalizeROI(Rect& roi, const Size& ksize, const Point& anchor, const Size& src_size) { if (roi == Rect(0,0,-1,-1)) @@ -329,74 +443,6 @@ Ptr cv::gpu::getColumnSumFilter_GPU(int sumType, int dstTy return Ptr(new NppColumnSumFilter(ksize, anchor)); } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Box Filter - -namespace -{ - typedef NppStatus (*nppFilterBox_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, - NppiSize oMaskSize, NppiPoint oAnchor); - - struct NPPBoxFilter : public BaseFilter_GPU - { - NPPBoxFilter(const Size& ksize_, const Point& anchor_, nppFilterBox_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - NppiSize oKernelSize; - oKernelSize.height = ksize.height; - oKernelSize.width = ksize.width; - NppiPoint oAnchor; - oAnchor.x = anchor.x; - oAnchor.y = anchor.y; - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, oKernelSize, oAnchor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - nppFilterBox_t func; - }; -} - -Ptr cv::gpu::getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor) -{ - static const nppFilterBox_t nppFilterBox_callers[] = {0, nppiFilterBox_8u_C1R, 0, 0, nppiFilterBox_8u_C4R}; - - CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); - - normalizeAnchor(anchor, ksize); - - return Ptr(new NPPBoxFilter(ksize, anchor, nppFilterBox_callers[CV_MAT_CN(srcType)])); -} - -Ptr cv::gpu::createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, const Point& anchor) -{ - Ptr boxFilter = getBoxFilter_GPU(srcType, dstType, ksize, anchor); - return createFilter2D_GPU(boxFilter, srcType, dstType); -} - -void cv::gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor, Stream& stream) -{ - int sdepth = src.depth(), cn = src.channels(); - if( ddepth < 0 ) - ddepth = sdepth; - - dst.create(src.size(), CV_MAKETYPE(ddepth, cn)); - - Ptr f = createBoxFilter_GPU(src.type(), dst.type(), ksize, anchor); - f->apply(src, dst, Rect(0,0,-1,-1), stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Morphology Filter @@ -633,7 +679,6 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke erode(buf2, dst, kernel, buf1, anchor, iterations, stream); break; -#ifdef HAVE_OPENCV_GPUARITHM case MORPH_GRADIENT: erode(src, buf2, kernel, buf1, anchor, iterations, stream); dilate(src, dst, kernel, buf1, anchor, iterations, stream); @@ -651,7 +696,6 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke erode(dst, buf2, kernel, buf1, anchor, iterations, stream); gpu::subtract(buf2, src, dst, GpuMat(), -1, stream); break; -#endif default: CV_Error(cv::Error::StsBadArg, "unknown morphological operation"); diff --git a/modules/gpufilters/src/precomp.hpp b/modules/gpufilters/src/precomp.hpp index 3add0f2af..c3d5e020d 100644 --- a/modules/gpufilters/src/precomp.hpp +++ b/modules/gpufilters/src/precomp.hpp @@ -46,14 +46,9 @@ #include #include "opencv2/gpufilters.hpp" +#include "opencv2/gpuarithm.hpp" #include "opencv2/imgproc.hpp" #include "opencv2/core/private.gpu.hpp" -#include "opencv2/opencv_modules.hpp" - -#ifdef HAVE_OPENCV_GPUARITHM -# include "opencv2/gpuarithm.hpp" -#endif - #endif /* __OPENCV_PRECOMP_H__ */ diff --git a/modules/gpufilters/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp index 5adcd87a4..a63d92b3d 100644 --- a/modules/gpufilters/test/test_filters.cpp +++ b/modules/gpufilters/test/test_filters.cpp @@ -70,13 +70,14 @@ namespace ///////////////////////////////////////////////////////////////////////////////////////////////// // Blur -PARAM_TEST_CASE(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, UseRoi) +PARAM_TEST_CASE(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi) { cv::gpu::DeviceInfo devInfo; cv::Size size; int type; cv::Size ksize; cv::Point anchor; + int borderType; bool useRoi; virtual void SetUp() @@ -86,7 +87,8 @@ PARAM_TEST_CASE(Blur, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, Use type = GET_PARAM(2); ksize = GET_PARAM(3); anchor = GET_PARAM(4); - useRoi = GET_PARAM(5); + borderType = GET_PARAM(5); + useRoi = GET_PARAM(6); cv::gpu::setDevice(devInfo.deviceID()); } @@ -96,13 +98,15 @@ GPU_TEST_P(Blur, Accuracy) { cv::Mat src = randomMat(size, type); + cv::Ptr blurFilter = cv::gpu::createBoxFilter(src.type(), -1, ksize, anchor, borderType); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::blur(loadMat(src, useRoi), dst, ksize, anchor); + blurFilter->apply(loadMat(src, useRoi), dst); cv::Mat dst_gold; - cv::blur(src, dst_gold, ksize, anchor); + cv::blur(src, dst_gold, ksize, anchor, borderType); - EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), 1.0); + EXPECT_MAT_NEAR(dst_gold, dst, 1.0); } INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine( @@ -111,6 +115,7 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine( testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)), testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7))), testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); ///////////////////////////////////////////////////////////////////////////////////////////////// From 1eedc6c42aaf7b5ac5cd5edca1b0a0367c7eb3f6 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 11:51:47 +0400 Subject: [PATCH 075/121] refactored Linear Filter --- .../gpufilters/include/opencv2/gpufilters.hpp | 41 ++- modules/gpufilters/perf/perf_filters.cpp | 81 +++-- modules/gpufilters/src/cuda/filter2d.cu | 189 +++++------ modules/gpufilters/src/filtering.cpp | 319 ++++++++---------- modules/gpufilters/test/test_filters.cpp | 210 ++++++------ samples/gpu/performance/tests.cpp | 5 +- 6 files changed, 416 insertions(+), 429 deletions(-) diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index 5cc2ac49a..32d3403d5 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -96,6 +96,34 @@ inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stre f->apply(src, dst, stream); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Linear Filter + +//! non-separable linear 2D filter +CV_EXPORTS Ptr createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1), + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, + Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, Point anchor, int borderType, Stream& stream) +{ + Ptr f = gpu::createLinearFilter(src.type(), ddepth, kernel, anchor, borderType); + f->apply(src, dst, stream); +} + + + + + + + +//! applies Laplacian operator to the image +//! supports only ksize = 1 and ksize = 3 +CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); + + @@ -194,13 +222,7 @@ CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, co CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int iterations = 1); -//! returns 2D filter with the specified kernel -//! supports CV_8U, CV_16U and CV_32F one and four channel image -CV_EXPORTS Ptr getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT); -//! returns the non-separable linear filter engine -CV_EXPORTS Ptr createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, - Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT); //! returns the primitive row filter with the specified kernel. //! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type. @@ -269,9 +291,6 @@ CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); -//! applies non-separable 2D linear filter to the image -CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); - //! applies separable 2D linear filter to the image CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); @@ -297,10 +316,6 @@ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); -//! applies Laplacian operator to the image -//! supports only ksize = 1 and ksize = 3 -CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); - }} // namespace cv { namespace gpu { #undef __OPENCV_GPUFILTERS_DEPR_BEFORE__ diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp index 35c4a94fb..3d3f58755 100644 --- a/modules/gpufilters/perf/perf_filters.cpp +++ b/modules/gpufilters/perf/perf_filters.cpp @@ -86,6 +86,51 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur, } } +////////////////////////////////////////////////////////////////////// +// Filter2D + +PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15))) +{ + declare.time(20.0); + + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const int ksize = GET_PARAM(2); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + cv::Mat kernel(ksize, ksize, CV_32FC1); + declare.in(kernel, WARMUP_RNG); + + if (PERF_RUN_GPU()) + { + const cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat dst; + + cv::Ptr filter2D = cv::gpu::createLinearFilter(d_src.type(), -1, kernel); + + TEST_CYCLE() filter2D->apply(d_src, dst); + + GPU_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::filter2D(src, dst, -1, kernel); + + CPU_SANITY_CHECK(dst); + } +} + + + + + + + + ////////////////////////////////////////////////////////////////////// // Sobel @@ -330,39 +375,3 @@ PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8 CPU_SANITY_CHECK(dst); } } - -////////////////////////////////////////////////////////////////////// -// Filter2D - -PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15))) -{ - declare.time(20.0); - - const cv::Size size = GET_PARAM(0); - const int type = GET_PARAM(1); - const int ksize = GET_PARAM(2); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - cv::Mat kernel(ksize, ksize, CV_32FC1); - declare.in(kernel, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::filter2D(d_src, dst, -1, kernel); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::filter2D(src, dst, -1, kernel); - - CPU_SANITY_CHECK(dst); - } -} diff --git a/modules/gpufilters/src/cuda/filter2d.cu b/modules/gpufilters/src/cuda/filter2d.cu index 80c93c54e..4e913124d 100644 --- a/modules/gpufilters/src/cuda/filter2d.cu +++ b/modules/gpufilters/src/cuda/filter2d.cu @@ -48,111 +48,104 @@ namespace cv { namespace gpu { namespace cudev { - namespace imgproc + template + __global__ void filter2D(const SrcPtr src, PtrStepSz dst, + const float* __restrict__ kernel, + const int kWidth, const int kHeight, + const int anchorX, const int anchorY) { - #define FILTER2D_MAX_KERNEL_SIZE 16 + typedef typename TypeVec::cn>::vec_type sum_t; - __constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE]; + const int x = blockIdx.x * blockDim.x + threadIdx.x; + const int y = blockIdx.y * blockDim.y + threadIdx.y; - template - __global__ void filter2D(const SrcT src, PtrStepSz dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY) + if (x >= dst.cols || y >= dst.rows) + return; + + sum_t res = VecTraits::all(0); + int kInd = 0; + + for (int i = 0; i < kHeight; ++i) { - typedef typename TypeVec::cn>::vec_type sum_t; - - const int x = blockIdx.x * blockDim.x + threadIdx.x; - const int y = blockIdx.y * blockDim.y + threadIdx.y; - - if (x >= dst.cols || y >= dst.rows) - return; - - sum_t res = VecTraits::all(0); - int kInd = 0; - - for (int i = 0; i < kHeight; ++i) - { - for (int j = 0; j < kWidth; ++j) - res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++]; - } - - dst(y, x) = saturate_cast(res); + for (int j = 0; j < kWidth; ++j) + res = res + src(y - anchorY + i, x - anchorX + j) * kernel[kInd++]; } - template class Brd> struct Filter2DCaller; - - #define IMPLEMENT_FILTER2D_TEX_READER(type) \ - texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \ - struct tex_filter2D_ ## type ## _reader \ - { \ - typedef type elem_type; \ - typedef int index_type; \ - const int xoff; \ - const int yoff; \ - tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \ - __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \ - { \ - return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \ - } \ - }; \ - template class Brd> struct Filter2DCaller< type , D, Brd> \ - { \ - static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz dst, \ - int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \ - { \ - typedef typename TypeVec::cn>::vec_type work_type; \ - dim3 block(16, 16); \ - dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \ - bindTexture(&tex_filter2D_ ## type , srcWhole); \ - tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \ - Brd brd(dst.rows, dst.cols, VecTraits::make(borderValue)); \ - BorderReader< tex_filter2D_ ## type ##_reader, Brd > brdSrc(texSrc, brd); \ - filter2D<<>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \ - cudaSafeCall( cudaGetLastError() ); \ - if (stream == 0) \ - cudaSafeCall( cudaDeviceSynchronize() ); \ - } \ - }; - - IMPLEMENT_FILTER2D_TEX_READER(uchar); - IMPLEMENT_FILTER2D_TEX_READER(uchar4); - - IMPLEMENT_FILTER2D_TEX_READER(ushort); - IMPLEMENT_FILTER2D_TEX_READER(ushort4); - - IMPLEMENT_FILTER2D_TEX_READER(float); - IMPLEMENT_FILTER2D_TEX_READER(float4); - - #undef IMPLEMENT_FILTER2D_TEX_READER - - template - void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, - int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, - int borderMode, const float* borderValue, cudaStream_t stream) - { - typedef void (*func_t)(const PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); - static const func_t funcs[] = - { - Filter2DCaller::call, - Filter2DCaller::call, - Filter2DCaller::call, - Filter2DCaller::call, - Filter2DCaller::call - }; - - if (stream == 0) - cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) ); - else - cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) ); - - funcs[borderMode](static_cast< PtrStepSz >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream); - } - - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); - template void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream); + dst(y, x) = saturate_cast(res); } + + template class Brd> struct Filter2DCaller; + + #define IMPLEMENT_FILTER2D_TEX_READER(type) \ + texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \ + struct tex_filter2D_ ## type ## _reader \ + { \ + typedef type elem_type; \ + typedef int index_type; \ + const int xoff; \ + const int yoff; \ + tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \ + __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \ + { \ + return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \ + } \ + }; \ + template class Brd> struct Filter2DCaller< type , D, Brd> \ + { \ + static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz dst, const float* kernel, \ + int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \ + { \ + typedef typename TypeVec::cn>::vec_type work_type; \ + dim3 block(16, 16); \ + dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \ + bindTexture(&tex_filter2D_ ## type , srcWhole); \ + tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \ + Brd brd(dst.rows, dst.cols, VecTraits::make(borderValue)); \ + BorderReader< tex_filter2D_ ## type ##_reader, Brd > brdSrc(texSrc, brd); \ + filter2D<<>>(brdSrc, dst, kernel, kWidth, kHeight, anchorX, anchorY); \ + cudaSafeCall( cudaGetLastError() ); \ + if (stream == 0) \ + cudaSafeCall( cudaDeviceSynchronize() ); \ + } \ + }; + + IMPLEMENT_FILTER2D_TEX_READER(uchar); + IMPLEMENT_FILTER2D_TEX_READER(uchar4); + + IMPLEMENT_FILTER2D_TEX_READER(ushort); + IMPLEMENT_FILTER2D_TEX_READER(ushort4); + + IMPLEMENT_FILTER2D_TEX_READER(float); + IMPLEMENT_FILTER2D_TEX_READER(float4); + + #undef IMPLEMENT_FILTER2D_TEX_READER + + template + void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, + int borderMode, const float* borderValue, cudaStream_t stream) + { + typedef void (*func_t)(const PtrStepSz srcWhole, int xoff, int yoff, PtrStepSz dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream); + static const func_t funcs[] = + { + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call, + Filter2DCaller::call + }; + + funcs[borderMode]((PtrStepSz) srcWhole, ofsX, ofsY, (PtrStepSz) dst, kernel, + kWidth, kHeight, anchorX, anchorY, borderValue, stream); + } + + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); + template void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream); }}} #endif // CUDA_DISABLER diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 35df05ec6..3135b599a 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -49,6 +49,8 @@ using namespace cv::gpu; Ptr cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } + Ptr cv::gpu::createFilter2D_GPU(const Ptr&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr(0); } @@ -57,8 +59,6 @@ Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { Ptr cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_no_cuda(); return Ptr(0); } @@ -76,7 +76,6 @@ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_no_cuda(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } -void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, int, Stream&) { throw_no_cuda(); } void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_no_cuda(); } void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_no_cuda(); } void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_no_cuda(); } @@ -188,6 +187,138 @@ Ptr cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Linear Filter + +namespace cv { namespace gpu { namespace cudev +{ + template + void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, + int borderMode, const float* borderValue, cudaStream_t stream); +}}} + +namespace +{ + class LinearFilter : public Filter + { + public: + LinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + typedef void (*filter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, + int kWidth, int kHeight, int anchorX, int anchorY, + int borderMode, const float* borderValue, cudaStream_t stream); + + GpuMat kernel_; + Point anchor_; + int type_; + filter2D_t func_; + int borderMode_; + Scalar_ borderVal_; + }; + + LinearFilter::LinearFilter(int srcType, int dstType, InputArray _kernel, Point anchor, int borderMode, Scalar borderVal) : + anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal) + { + const int sdepth = CV_MAT_DEPTH(srcType); + const int scn = CV_MAT_CN(srcType); + + Mat kernel = _kernel.getMat(); + + CV_Assert( sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F ); + CV_Assert( scn == 1 || scn == 4 ); + CV_Assert( dstType == srcType ); + CV_Assert( kernel.channels() == 1 ); + CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP ); + + Mat kernel32F; + kernel.convertTo(kernel32F, CV_32F); + + kernel_ = gpu::createContinuous(kernel.size(), CV_32FC1); + kernel_.upload(kernel32F); + + normalizeAnchor(anchor_, kernel.size()); + + switch (srcType) + { + case CV_8UC1: + func_ = cudev::filter2D; + break; + case CV_8UC4: + func_ = cudev::filter2D; + break; + case CV_16UC1: + func_ = cudev::filter2D; + break; + case CV_16UC4: + func_ = cudev::filter2D; + break; + case CV_32FC1: + func_ = cudev::filter2D; + break; + case CV_32FC4: + func_ = cudev::filter2D; + break; + } + } + + void LinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == type_ ); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + Point ofs; + Size wholeSize; + src.locateROI(wholeSize, ofs); + + GpuMat srcWhole(wholeSize, src.type(), src.datastart); + + func_(srcWhole, ofs.x, ofs.y, dst, kernel_.ptr(), + kernel_.cols, kernel_.rows, anchor_.x, anchor_.y, + borderMode_, borderVal_.val, StreamAccessor::getStream(_stream)); + } +} + +Ptr cv::gpu::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal) +{ + if (dstType < 0) + dstType = srcType; + + return new LinearFilter(srcType, dstType, kernel, anchor, borderMode, borderVal); +} + + + + + + + + + + +void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream) +{ + CV_Assert(ksize == 1 || ksize == 3); + + static const int K[2][9] = + { + {0, 1, 0, 1, -4, 1, 0, 1, 0}, + {2, 0, 2, 0, -8, 0, 2, 0, 2} + }; + Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]); + if (scale != 1) + kernel *= scale; + + Ptr f = gpu::createLinearFilter(src.type(), ddepth, kernel, Point(-1,-1), borderType); + f->apply(src, dst, stream); +} + @@ -702,172 +833,6 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke } } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Linear Filter - -namespace cv { namespace gpu { namespace cudev -{ - namespace imgproc - { - template - void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, - int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, - int borderMode, const float* borderValue, cudaStream_t stream); - } -}}} - -namespace -{ - typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, - const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor); - - struct NPPLinearFilter : public BaseFilter_GPU - { - NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) : - BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - NppiSize oKernelSize; - oKernelSize.height = ksize.height; - oKernelSize.width = ksize.width; - NppiPoint oAnchor; - oAnchor.x = anchor.x; - oAnchor.y = anchor.y; - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, - kernel.ptr(), oKernelSize, oAnchor, nDivisor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - GpuMat kernel; - Npp32s nDivisor; - nppFilter2D_t func; - }; - - typedef void (*gpuFilter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, - int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, - int borderMode, const float* borderValue, cudaStream_t stream); - - struct GpuFilter2D : public BaseFilter_GPU - { - GpuFilter2D(Size ksize_, Point anchor_, gpuFilter2D_t func_, const GpuMat& kernel_, int brd_type_) : - BaseFilter_GPU(ksize_, anchor_), func(func_), kernel(kernel_), brd_type(brd_type_) - { - } - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) - { - using namespace cv::gpu::cudev::imgproc; - - Point ofs; - Size wholeSize; - src.locateROI(wholeSize, ofs); - GpuMat srcWhole(wholeSize, src.type(), src.datastart); - - static const Scalar_ zero = Scalar_::all(0.0f); - func(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr(), brd_type, zero.val, StreamAccessor::getStream(stream)); - } - - gpuFilter2D_t func; - GpuMat kernel; - int brd_type; - }; -} - -Ptr cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int brd_type) -{ - using namespace cv::gpu::cudev::imgproc; - - int sdepth = CV_MAT_DEPTH(srcType); - int scn = CV_MAT_CN(srcType); - - CV_Assert(sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F); - CV_Assert(scn == 1 || scn == 4); - CV_Assert(dstType == srcType); - CV_Assert(brd_type == BORDER_REFLECT101 || brd_type == BORDER_REPLICATE || brd_type == BORDER_CONSTANT || brd_type == BORDER_REFLECT || brd_type == BORDER_WRAP); - - Size ksize = kernel.size(); - -#if 0 - if ((srcType == CV_8UC1 || srcType == CV_8UC4) && brd_type == BORDER_CONSTANT) - { - static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R}; - - GpuMat gpu_krnl; - int nDivisor; - normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true); - - normalizeAnchor(anchor, ksize); - - return Ptr(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)])); - } -#endif - - CV_Assert(ksize.width * ksize.height <= 16 * 16); - - GpuMat gpu_krnl; - normalizeKernel(kernel, gpu_krnl, CV_32F); - - normalizeAnchor(anchor, ksize); - - gpuFilter2D_t func = 0; - - switch (srcType) - { - case CV_8UC1: - func = filter2D_gpu; - break; - case CV_8UC4: - func = filter2D_gpu; - break; - case CV_16UC1: - func = filter2D_gpu; - break; - case CV_16UC4: - func = filter2D_gpu; - break; - case CV_32FC1: - func = filter2D_gpu; - break; - case CV_32FC4: - func = filter2D_gpu; - break; - } - - return Ptr(new GpuFilter2D(ksize, anchor, func, gpu_krnl, brd_type)); -} - -Ptr cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int borderType) -{ - Ptr linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, anchor, borderType); - - return createFilter2D_GPU(linearFilter, srcType, dstType); -} - -void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, int borderType, Stream& stream) -{ - if (ddepth < 0) - ddepth = src.depth(); - - int dst_type = CV_MAKE_TYPE(ddepth, src.channels()); - - Ptr f = createLinearFilter_GPU(src.type(), dst_type, kernel, anchor, borderType); - - dst.create(src.size(), dst_type); - - f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Separable Linear Filter @@ -1208,22 +1173,6 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream); } -void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream) -{ - CV_Assert(ksize == 1 || ksize == 3); - - static const int K[2][9] = - { - {0, 1, 0, 1, -4, 1, 0, 1, 0}, - {2, 0, 2, 0, -8, 0, 2, 0, 2} - }; - Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]); - if (scale != 1) - kernel *= scale; - - filter2D(src, dst, ddepth, kernel, Point(-1,-1), borderType, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Gaussian Filter diff --git a/modules/gpufilters/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp index a63d92b3d..6d6da7e4b 100644 --- a/modules/gpufilters/test/test_filters.cpp +++ b/modules/gpufilters/test/test_filters.cpp @@ -118,6 +118,121 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine( testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); +///////////////////////////////////////////////////////////////////////////////////////////////// +// Filter2D + +PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + cv::Size ksize; + cv::Point anchor; + int borderType; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + ksize = GET_PARAM(3); + anchor = GET_PARAM(4); + borderType = GET_PARAM(5); + useRoi = GET_PARAM(6); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Filter2D, Accuracy) +{ + cv::Mat src = randomMat(size, type); + cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0); + + cv::Ptr filter2D = cv::gpu::createLinearFilter(src.type(), -1, kernel, anchor, borderType); + + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + filter2D->apply(loadMat(src, useRoi), dst); + + cv::Mat dst_gold; + cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType); + + EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0); +} + +INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)), + testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), + testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), + testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); + + + + + + + + + + + + + +///////////////////////////////////////////////////////////////////////////////////////////////// +// Laplacian + +PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int type; + cv::Size ksize; + bool useRoi; + + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + type = GET_PARAM(2); + ksize = GET_PARAM(3); + useRoi = GET_PARAM(4); + + cv::gpu::setDevice(devInfo.deviceID()); + } +}; + +GPU_TEST_P(Laplacian, Accuracy) +{ + cv::Mat src = randomMat(size, type); + + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width); + + cv::Mat dst_gold; + cv::Laplacian(src, dst_gold, -1, ksize.width); + + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3); +} + +INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)), + testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))), + WHOLE_SUBMAT)); + + + + + + + + + ///////////////////////////////////////////////////////////////////////////////////////////////// // Sobel @@ -332,49 +447,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine( BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); -///////////////////////////////////////////////////////////////////////////////////////////////// -// Laplacian - -PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - cv::Size size; - int type; - cv::Size ksize; - bool useRoi; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - size = GET_PARAM(1); - type = GET_PARAM(2); - ksize = GET_PARAM(3); - useRoi = GET_PARAM(4); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(Laplacian, Accuracy) -{ - cv::Mat src = randomMat(size, type); - - cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width); - - cv::Mat dst_gold; - cv::Laplacian(src, dst_gold, -1, ksize.width); - - EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3); -} - -INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine( - ALL_DEVICES, - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)), - testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))), - WHOLE_SUBMAT)); - ///////////////////////////////////////////////////////////////////////////////////////////////// // Erode @@ -527,56 +599,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, MorphEx, testing::Combine( testing::Values(Iterations(1), Iterations(2), Iterations(3)), WHOLE_SUBMAT)); -///////////////////////////////////////////////////////////////////////////////////////////////// -// Filter2D - -PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi) -{ - cv::gpu::DeviceInfo devInfo; - cv::Size size; - int type; - cv::Size ksize; - cv::Point anchor; - int borderType; - bool useRoi; - - cv::Mat img; - - virtual void SetUp() - { - devInfo = GET_PARAM(0); - size = GET_PARAM(1); - type = GET_PARAM(2); - ksize = GET_PARAM(3); - anchor = GET_PARAM(4); - borderType = GET_PARAM(5); - useRoi = GET_PARAM(6); - - cv::gpu::setDevice(devInfo.deviceID()); - } -}; - -GPU_TEST_P(Filter2D, Accuracy) -{ - cv::Mat src = randomMat(size, type); - cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0); - - cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::filter2D(loadMat(src, useRoi), dst, -1, kernel, anchor, borderType); - - cv::Mat dst_gold; - cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType); - - EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0); -} - -INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine( - ALL_DEVICES, - DIFFERENT_SIZES, - testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)), - testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))), - testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), - testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), - WHOLE_SUBMAT)); - #endif // HAVE_CUDA diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index 97eb7a82a..f6ace6d77 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -961,10 +961,11 @@ TEST(filter2D) gpu::GpuMat d_src(src); gpu::GpuMat d_dst; - gpu::filter2D(d_src, d_dst, -1, kernel); + Ptr filter2D = gpu::createLinearFilter(d_src.type(), -1, kernel); + filter2D->apply(d_src, d_dst); GPU_ON; - gpu::filter2D(d_src, d_dst, -1, kernel); + filter2D->apply(d_src, d_dst); GPU_OFF; } } From ee7eb1b807d1201bdd5472171c0fd4e74a99b185 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 12:02:59 +0400 Subject: [PATCH 076/121] refactored Laplacian filter --- .../gpufilters/include/opencv2/gpufilters.hpp | 18 +++-- modules/gpufilters/perf/perf_filters.cpp | 68 ++++++++++--------- modules/gpufilters/src/filtering.cpp | 30 ++++---- modules/gpufilters/test/test_filters.cpp | 16 +---- 4 files changed, 63 insertions(+), 69 deletions(-) diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index 32d3403d5..1133ae01d 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -113,17 +113,23 @@ inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray ker f->apply(src, dst, stream); } - - - - - +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Laplacian Filter //! applies Laplacian operator to the image //! supports only ksize = 1 and ksize = 3 -CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()); +CV_EXPORTS Ptr createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1, + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void Laplacian(InputArray src, OutputArray dst, int ddepth, + int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; +inline void Laplacian(InputArray src, OutputArray dst, int ddepth, int ksize, double scale, int borderType, Stream& stream) +{ + Ptr f = gpu::createLaplacianFilter(src.type(), ddepth, ksize, scale, borderType); + f->apply(src, dst, stream); +} diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp index 3d3f58755..efa99696a 100644 --- a/modules/gpufilters/perf/perf_filters.cpp +++ b/modules/gpufilters/perf/perf_filters.cpp @@ -124,6 +124,41 @@ PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV } } +////////////////////////////////////////////////////////////////////// +// Laplacian + +PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3))) +{ + declare.time(20.0); + + const cv::Size size = GET_PARAM(0); + const int type = GET_PARAM(1); + const int ksize = GET_PARAM(2); + + cv::Mat src(size, type); + declare.in(src, WARMUP_RNG); + + if (PERF_RUN_GPU()) + { + const cv::gpu::GpuMat d_src(src); + cv::gpu::GpuMat dst; + + cv::Ptr laplacian = cv::gpu::createLaplacianFilter(d_src.type(), -1, ksize); + + TEST_CYCLE() laplacian->apply(d_src, dst); + + GPU_SANITY_CHECK(dst); + } + else + { + cv::Mat dst; + + TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize); + + CPU_SANITY_CHECK(dst); + } +} + @@ -232,39 +267,6 @@ PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Value } } -////////////////////////////////////////////////////////////////////// -// Laplacian - -PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3))) -{ - declare.time(20.0); - - const cv::Size size = GET_PARAM(0); - const int type = GET_PARAM(1); - const int ksize = GET_PARAM(2); - - cv::Mat src(size, type); - declare.in(src, WARMUP_RNG); - - if (PERF_RUN_GPU()) - { - const cv::gpu::GpuMat d_src(src); - cv::gpu::GpuMat dst; - - TEST_CYCLE() cv::gpu::Laplacian(d_src, dst, -1, ksize); - - GPU_SANITY_CHECK(dst); - } - else - { - cv::Mat dst; - - TEST_CYCLE() cv::Laplacian(src, dst, -1, ksize); - - CPU_SANITY_CHECK(dst); - } -} - ////////////////////////////////////////////////////////////////////// // Erode diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 3135b599a..305ecec23 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -51,6 +51,8 @@ Ptr cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw Ptr cv::gpu::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createLaplacianFilter(int, int, int, double, int, Scalar) { throw_no_cuda(); return Ptr(); } + Ptr cv::gpu::createFilter2D_GPU(const Ptr&, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr(0); } @@ -84,7 +86,6 @@ void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double, int, int) { void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, GpuMat&, double, int, int, Stream&) { throw_no_cuda(); } void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double, int, int) { throw_no_cuda(); } void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, GpuMat&, double, double, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::Laplacian(const GpuMat&, GpuMat&, int, int, double, int, Stream&) { throw_no_cuda(); } #else @@ -293,30 +294,24 @@ Ptr cv::gpu::createLinearFilter(int srcType, int dstType, InputArray ker return new LinearFilter(srcType, dstType, kernel, anchor, borderMode, borderVal); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Laplacian Filter - - - - - - - - -void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream) +Ptr cv::gpu::createLaplacianFilter(int srcType, int dstType, int ksize, double scale, int borderMode, Scalar borderVal) { - CV_Assert(ksize == 1 || ksize == 3); + CV_Assert( ksize == 1 || ksize == 3 ); - static const int K[2][9] = + static const float K[2][9] = { - {0, 1, 0, 1, -4, 1, 0, 1, 0}, - {2, 0, 2, 0, -8, 0, 2, 0, 2} + {0.0f, 1.0f, 0.0f, 1.0f, -4.0f, 1.0f, 0.0f, 1.0f, 0.0f}, + {2.0f, 0.0f, 2.0f, 0.0f, -8.0f, 0.0f, 2.0f, 0.0f, 2.0f} }; - Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]); + + Mat kernel(3, 3, CV_32FC1, (void*)K[ksize == 3]); if (scale != 1) kernel *= scale; - Ptr f = gpu::createLinearFilter(src.type(), ddepth, kernel, Point(-1,-1), borderType); - f->apply(src, dst, stream); + return gpu::createLinearFilter(srcType, dstType, kernel, Point(-1,-1), borderMode, borderVal); } @@ -347,6 +342,7 @@ void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, d + namespace diff --git a/modules/gpufilters/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp index 6d6da7e4b..42018424d 100644 --- a/modules/gpufilters/test/test_filters.cpp +++ b/modules/gpufilters/test/test_filters.cpp @@ -170,18 +170,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine( testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); - - - - - - - - - - - - ///////////////////////////////////////////////////////////////////////////////////////////////// // Laplacian @@ -209,8 +197,10 @@ GPU_TEST_P(Laplacian, Accuracy) { cv::Mat src = randomMat(size, type); + cv::Ptr laplacian = cv::gpu::createLaplacianFilter(src.type(), -1, ksize.width); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width); + laplacian->apply(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::Laplacian(src, dst_gold, -1, ksize.width); From 12ae11e2ff56ac9be36708b356601f1f62aba829 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 14:47:42 +0400 Subject: [PATCH 077/121] refactored Separable Linear Filters --- .../include/opencv2/gpufeatures2d.hpp | 2 +- modules/gpufeatures2d/src/orb.cpp | 4 +- .../gpufilters/include/opencv2/gpufilters.hpp | 146 ++-- modules/gpufilters/perf/perf_filters.cpp | 29 +- modules/gpufilters/src/filtering.cpp | 777 +++++------------- modules/gpufilters/test/test_filters.cpp | 109 ++- .../gpuimgproc/include/opencv2/gpuimgproc.hpp | 2 +- modules/gpuimgproc/src/canny.cpp | 8 +- modules/gpuimgproc/src/corners.cpp | 15 +- modules/superres/src/btv_l1_gpu.cpp | 8 +- .../gpu-basics-similarity.cpp | 24 +- samples/gpu/performance/tests.cpp | 6 +- 12 files changed, 427 insertions(+), 703 deletions(-) diff --git a/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp b/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp index 0c821745f..cc73da9d9 100644 --- a/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp +++ b/modules/gpufeatures2d/include/opencv2/gpufeatures2d.hpp @@ -351,7 +351,7 @@ private: FAST_GPU fastDetector_; - Ptr blurFilter; + Ptr blurFilter; GpuMat d_keypoints_; }; diff --git a/modules/gpufeatures2d/src/orb.cpp b/modules/gpufeatures2d/src/orb.cpp index 495ca3f6e..7cb1cbecc 100644 --- a/modules/gpufeatures2d/src/orb.cpp +++ b/modules/gpufeatures2d/src/orb.cpp @@ -468,7 +468,7 @@ cv::gpu::ORB_GPU::ORB_GPU(int nFeatures, float scaleFactor, int nLevels, int edg pattern_.upload(h_pattern); - blurFilter = createGaussianFilter_GPU(CV_8UC1, Size(7, 7), 2, 2, BORDER_REFLECT_101); + blurFilter = gpu::createGaussianFilter(CV_8UC1, -1, Size(7, 7), 2, 2, BORDER_REFLECT_101); blurForDescriptor = false; } @@ -632,7 +632,7 @@ void cv::gpu::ORB_GPU::computeDescriptors(GpuMat& descriptors) { // preprocess the resized image ensureSizeIsEnough(imagePyr_[level].size(), imagePyr_[level].type(), buf_); - blurFilter->apply(imagePyr_[level], buf_, Rect(0, 0, imagePyr_[level].cols, imagePyr_[level].rows)); + blurFilter->apply(imagePyr_[level], buf_); } computeOrbDescriptor_gpu(blurForDescriptor ? buf_ : imagePyr_[level], keyPointsPyr_[level].ptr(0), keyPointsPyr_[level].ptr(2), diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index 1133ae01d..bf7a8e928 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -131,6 +131,83 @@ inline void Laplacian(InputArray src, OutputArray dst, int ddepth, int ksize, do f->apply(src, dst, stream); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Separable Linear Filter + +//! separable linear 2D filter +CV_EXPORTS Ptr createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, + Point anchor = Point(-1,-1), int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void sepFilter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernelX, InputArray kernelY, + Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void sepFilter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernelX, InputArray kernelY, Point anchor, int rowBorderType, int columnBorderType, Stream& stream) +{ + Ptr f = gpu::createSeparableLinearFilter(src.type(), ddepth, kernelX, kernelY, anchor, rowBorderType, columnBorderType); + f->apply(src, dst, stream); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Deriv Filter + +//! the generalized Deriv operator +CV_EXPORTS Ptr createDerivFilter(int srcType, int dstType, int dx, int dy, + int ksize, bool normalize = false, double scale = 1, + int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); + +//! the Sobel operator +CV_EXPORTS Ptr createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize = 3, + double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); + +//! the vertical or horizontal Scharr operator +CV_EXPORTS Ptr createScharrFilter(int srcType, int dstType, int dx, int dy, + double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void Sobel(InputArray src, OutputArray dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void Sobel(InputArray src, OutputArray dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType, Stream& stream) +{ + Ptr f = gpu::createSobelFilter(src.type(), ddepth, dx, dy, ksize, scale, rowBorderType, columnBorderType); + f->apply(src, dst, stream); +} + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void Scharr(InputArray src, OutputArray dst, int ddepth, int dx, int dy, double scale = 1, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void Scharr(InputArray src, OutputArray dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType, Stream& stream) +{ + Ptr f = gpu::createScharrFilter(src.type(), ddepth, dx, dy, scale, rowBorderType, columnBorderType); + f->apply(src, dst, stream); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Gaussian Filter + +//! smooths the image using Gaussian filter +CV_EXPORTS Ptr createGaussianFilter(int srcType, int dstType, Size ksize, + double sigma1, double sigma2 = 0, + int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void GaussianBlur(InputArray src, OutputArray dst, Size ksize, + double sigma1, double sigma2 = 0, + int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType, Stream& stream) +{ + Ptr f = gpu::createGaussianFilter(src.type(), -1, ksize, sigma1, sigma2, rowBorderType, columnBorderType); + f->apply(src, dst, stream); +} + + + + + + @@ -196,14 +273,7 @@ public: virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0; }; -//! returns the non-separable filter engine with the specified filter -CV_EXPORTS Ptr createFilter2D_GPU(const Ptr& filter2D, int srcType, int dstType); -//! returns the separable filter engine with the specified filters -CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr& rowFilter, - const Ptr& columnFilter, int srcType, int bufType, int dstType); -CV_EXPORTS Ptr createSeparableFilter_GPU(const Ptr& rowFilter, - const Ptr& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf); //! returns horizontal 1D box filter //! supports only CV_8UC1 source type and CV_32FC1 sum type @@ -230,47 +300,7 @@ CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, co -//! returns the primitive row filter with the specified kernel. -//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type. -//! there are two version of algorithm: NPP and OpenCV. -//! NPP calls when srcType == CV_8UC1 or srcType == CV_8UC4 and bufType == srcType, -//! otherwise calls OpenCV version. -//! NPP supports only BORDER_CONSTANT border type. -//! OpenCV version supports only CV_32F as buffer depth and -//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types. -CV_EXPORTS Ptr getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, - int anchor = -1, int borderType = BORDER_DEFAULT); -//! returns the primitive column filter with the specified kernel. -//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 dst type. -//! there are two version of algorithm: NPP and OpenCV. -//! NPP calls when dstType == CV_8UC1 or dstType == CV_8UC4 and bufType == dstType, -//! otherwise calls OpenCV version. -//! NPP supports only BORDER_CONSTANT border type. -//! OpenCV version supports only CV_32F as buffer depth and -//! BORDER_REFLECT101, BORDER_REPLICATE and BORDER_CONSTANT border types. -CV_EXPORTS Ptr getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, - int anchor = -1, int borderType = BORDER_DEFAULT); - -//! returns the separable linear filter engine -CV_EXPORTS Ptr createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, - const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, - int columnBorderType = -1); -CV_EXPORTS Ptr createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, - const Mat& columnKernel, GpuMat& buf, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, - int columnBorderType = -1); - -//! returns filter engine for the generalized Sobel operator -CV_EXPORTS Ptr createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); -CV_EXPORTS Ptr createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); - -//! returns the Gaussian filter engine -CV_EXPORTS Ptr createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); -CV_EXPORTS Ptr createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); //! returns maximum filter CV_EXPORTS Ptr getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); @@ -297,30 +327,8 @@ CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); -//! applies separable 2D linear filter to the image -CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, - Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); -CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf, - Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, - Stream& stream = Stream::Null()); -//! applies generalized Sobel operator to the image -CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); -CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize = 3, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); -//! applies the vertical or horizontal Scharr operator to the image -CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); -CV_EXPORTS void Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); - -//! smooths the image using Gaussian filter. -CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2 = 0, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1); -CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()); }} // namespace cv { namespace gpu { diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp index efa99696a..9c7dcb8b4 100644 --- a/modules/gpufilters/perf/perf_filters.cpp +++ b/modules/gpufilters/perf/perf_filters.cpp @@ -159,13 +159,6 @@ PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(C } } - - - - - - - ////////////////////////////////////////////////////////////////////// // Sobel @@ -184,9 +177,10 @@ PERF_TEST_P(Sz_Type_KernelSz, Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U { const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf; - TEST_CYCLE() cv::gpu::Sobel(d_src, dst, -1, 1, 1, d_buf, ksize); + cv::Ptr sobel = cv::gpu::createSobelFilter(d_src.type(), -1, 1, 1, ksize); + + TEST_CYCLE() sobel->apply(d_src, dst); GPU_SANITY_CHECK(dst); } @@ -217,9 +211,10 @@ PERF_TEST_P(Sz_Type, Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8 { const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf; - TEST_CYCLE() cv::gpu::Scharr(d_src, dst, -1, 1, 0, d_buf); + cv::Ptr scharr = cv::gpu::createScharrFilter(d_src.type(), -1, 1, 0); + + TEST_CYCLE() scharr->apply(d_src, dst); GPU_SANITY_CHECK(dst); } @@ -251,9 +246,10 @@ PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Value { const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf; - TEST_CYCLE() cv::gpu::GaussianBlur(d_src, dst, cv::Size(ksize, ksize), d_buf, 0.5); + cv::Ptr gauss = cv::gpu::createGaussianFilter(d_src.type(), -1, cv::Size(ksize, ksize), 0.5); + + TEST_CYCLE() gauss->apply(d_src, dst); GPU_SANITY_CHECK(dst); } @@ -267,6 +263,13 @@ PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Value } } + + + + + + + ////////////////////////////////////////////////////////////////////// // Erode diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 305ecec23..c4de57440 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -53,22 +53,24 @@ Ptr cv::gpu::createLinearFilter(int, int, InputArray, Point, int, Scalar Ptr cv::gpu::createLaplacianFilter(int, int, int, double, int, Scalar) { throw_no_cuda(); return Ptr(); } -Ptr cv::gpu::createFilter2D_GPU(const Ptr&, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createSeparableFilter_GPU(const Ptr&, const Ptr&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr(0); } +Ptr cv::gpu::createSeparableLinearFilter(int, int, InputArray, InputArray, Point, int, int) { throw_no_cuda(); return Ptr(); } + +Ptr cv::gpu::createDerivFilter(int, int, int, int, int, bool, double, int, int) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createSobelFilter(int, int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createScharrFilter(int, int, int, int, double, int, int) { throw_no_cuda(); return Ptr(); } + +Ptr cv::gpu::createGaussianFilter(int, int, Size, double, double, int, int) { throw_no_cuda(); return Ptr(); } + + + + + + Ptr cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, GpuMat&, const Point&, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createDerivFilter_GPU(int, int, int, int, int, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createDerivFilter_GPU(int, int, int, int, int, GpuMat&, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createGaussianFilter_GPU(int, Size, double, double, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createGaussianFilter_GPU(int, Size, GpuMat&, double, double, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } @@ -78,14 +80,7 @@ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_no_cuda(); } void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } -void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_no_cuda(); } -void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_no_cuda(); } -void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, GpuMat&, int, double, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, double, int, int) { throw_no_cuda(); } -void cv::gpu::Scharr(const GpuMat&, GpuMat&, int, int, int, GpuMat&, double, int, int, Stream&) { throw_no_cuda(); } -void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, double, double, int, int) { throw_no_cuda(); } -void cv::gpu::GaussianBlur(const GpuMat&, GpuMat&, Size, GpuMat&, double, double, int, int, Stream&) { throw_no_cuda(); } + #else @@ -185,6 +180,8 @@ Ptr cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point if (dstType < 0) dstType = srcType; + dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType)); + return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); } @@ -291,6 +288,8 @@ Ptr cv::gpu::createLinearFilter(int srcType, int dstType, InputArray ker if (dstType < 0) dstType = srcType; + dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType)); + return new LinearFilter(srcType, dstType, kernel, anchor, borderMode, borderVal); } @@ -314,7 +313,198 @@ Ptr cv::gpu::createLaplacianFilter(int srcType, int dstType, int ksize, return gpu::createLinearFilter(srcType, dstType, kernel, Point(-1,-1), borderMode, borderVal); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Separable Linear Filter +namespace filter +{ + template + void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); + + template + void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); +} + +namespace +{ + class SeparableLinearFilter : public Filter + { + public: + SeparableLinearFilter(int srcType, int dstType, + InputArray rowKernel, InputArray columnKernel, + Point anchor, int rowBorderMode, int columnBorderMode); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); + + int srcType_, bufType_, dstType_; + GpuMat rowKernel_, columnKernel_; + func_t rowFilter_, columnFilter_; + Point anchor_; + int rowBorderMode_, columnBorderMode_; + + GpuMat buf_; + }; + + SeparableLinearFilter::SeparableLinearFilter(int srcType, int dstType, + InputArray _rowKernel, InputArray _columnKernel, + Point anchor, int rowBorderMode, int columnBorderMode) : + srcType_(srcType), dstType_(dstType), anchor_(anchor), rowBorderMode_(rowBorderMode), columnBorderMode_(columnBorderMode) + { + static const func_t rowFilterFuncs[7][4] = + { + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0}, + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {filter::linearRow, 0, filter::linearRow, filter::linearRow}, + {0, 0, 0, 0} + }; + + static const func_t columnFilterFuncs[7][4] = + { + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0}, + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, + {0, 0, 0, 0} + }; + + const int sdepth = CV_MAT_DEPTH(srcType); + const int cn = CV_MAT_CN(srcType); + const int ddepth = CV_MAT_DEPTH(dstType); + + Mat rowKernel = _rowKernel.getMat(); + Mat columnKernel = _columnKernel.getMat(); + + CV_Assert( sdepth <= CV_64F && cn <= 4 ); + CV_Assert( rowKernel.channels() == 1 ); + CV_Assert( columnKernel.channels() == 1 ); + CV_Assert( rowBorderMode == BORDER_REFLECT101 || rowBorderMode == BORDER_REPLICATE || rowBorderMode == BORDER_CONSTANT || rowBorderMode == BORDER_REFLECT || rowBorderMode == BORDER_WRAP ); + CV_Assert( columnBorderMode == BORDER_REFLECT101 || columnBorderMode == BORDER_REPLICATE || columnBorderMode == BORDER_CONSTANT || columnBorderMode == BORDER_REFLECT || columnBorderMode == BORDER_WRAP ); + + Mat kernel32F; + + rowKernel.convertTo(kernel32F, CV_32F); + rowKernel_.upload(kernel32F.reshape(1, 1)); + + columnKernel.convertTo(kernel32F, CV_32F); + columnKernel_.upload(kernel32F.reshape(1, 1)); + + CV_Assert( rowKernel_.cols > 0 && rowKernel_.cols <= 32 ); + CV_Assert( columnKernel_.cols > 0 && columnKernel_.cols <= 32 ); + + normalizeAnchor(anchor_.x, rowKernel_.cols); + normalizeAnchor(anchor_.y, columnKernel_.cols); + + bufType_ = CV_MAKE_TYPE(CV_32F, cn); + + rowFilter_ = rowFilterFuncs[sdepth][cn - 1]; + CV_Assert( rowFilter_ != 0 ); + + columnFilter_ = columnFilterFuncs[ddepth][cn - 1]; + CV_Assert( columnFilter_ != 0 ); + } + + void SeparableLinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == srcType_ ); + + _dst.create(src.size(), dstType_); + GpuMat dst = _dst.getGpuMat(); + + ensureSizeIsEnough(src.size(), bufType_, buf_); + + DeviceInfo devInfo; + const int cc = devInfo.major() * 10 + devInfo.minor(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + rowFilter_(src, buf_, rowKernel_.ptr(), rowKernel_.cols, anchor_.x, rowBorderMode_, cc, stream); + columnFilter_(buf_, dst, columnKernel_.ptr(), columnKernel_.cols, anchor_.y, columnBorderMode_, cc, stream); + } +} + +Ptr cv::gpu::createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor, int rowBorderMode, int columnBorderMode) +{ + if (dstType < 0) + dstType = srcType; + + dstType = CV_MAKE_TYPE(CV_MAT_DEPTH(dstType), CV_MAT_CN(srcType)); + + if (columnBorderMode < 0) + columnBorderMode = rowBorderMode; + + return new SeparableLinearFilter(srcType, dstType, rowKernel, columnKernel, anchor, rowBorderMode, columnBorderMode); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Deriv Filter + +Ptr cv::gpu::createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize, double scale, int rowBorderMode, int columnBorderMode) +{ + Mat kx, ky; + getDerivKernels(kx, ky, dx, dy, ksize, normalize, CV_32F); + + if (scale != 1) + { + // usually the smoothing part is the slowest to compute, + // so try to scale it instead of the faster differenciating part + if (dx == 0) + kx *= scale; + else + ky *= scale; + } + + return gpu::createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1, -1), rowBorderMode, columnBorderMode); +} + +Ptr cv::gpu::createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize, double scale, int rowBorderMode, int columnBorderMode) +{ + return gpu::createDerivFilter(srcType, dstType, dx, dy, ksize, false, scale, rowBorderMode, columnBorderMode); +} + +Ptr cv::gpu::createScharrFilter(int srcType, int dstType, int dx, int dy, double scale, int rowBorderMode, int columnBorderMode) +{ + return gpu::createDerivFilter(srcType, dstType, dx, dy, -1, false, scale, rowBorderMode, columnBorderMode); +} + +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Gaussian Filter + +Ptr cv::gpu::createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2, int rowBorderMode, int columnBorderMode) +{ + const int depth = CV_MAT_DEPTH(srcType); + + if (sigma2 <= 0) + sigma2 = sigma1; + + // automatic detection of kernel size from sigma + if (ksize.width <= 0 && sigma1 > 0) + ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1; + if (ksize.height <= 0 && sigma2 > 0) + ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1; + + CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 ); + + sigma1 = std::max(sigma1, 0.0); + sigma2 = std::max(sigma2, 0.0); + + Mat kx = getGaussianKernel(ksize.width, sigma1, CV_32F); + Mat ky; + if (ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON) + ky = kx; + else + ky = getGaussianKernel(ksize.height, sigma2, CV_32F); + + return createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1,-1), rowBorderMode, columnBorderMode); +} @@ -377,128 +567,6 @@ namespace } } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Filter2D - -namespace -{ - struct Filter2DEngine_GPU : public FilterEngine_GPU - { - Filter2DEngine_GPU(const Ptr& filter2D_, int srcType_, int dstType_) : - filter2D(filter2D_), srcType(srcType_), dstType(dstType_) - {} - - virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) - { - CV_Assert(src.type() == srcType); - - Size src_size = src.size(); - - dst.create(src_size, dstType); - - if (roi.size() != src_size) - { - dst.setTo(Scalar::all(0), stream); - } - - normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size); - - GpuMat srcROI = src(roi); - GpuMat dstROI = dst(roi); - - (*filter2D)(srcROI, dstROI, stream); - } - - Ptr filter2D; - int srcType, dstType; - }; -} - -Ptr cv::gpu::createFilter2D_GPU(const Ptr& filter2D, int srcType, int dstType) -{ - return Ptr(new Filter2DEngine_GPU(filter2D, srcType, dstType)); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// SeparableFilter - -namespace -{ - struct SeparableFilterEngine_GPU : public FilterEngine_GPU - { - SeparableFilterEngine_GPU(const Ptr& rowFilter_, const Ptr& columnFilter_, - int srcType_, int bufType_, int dstType_) : - rowFilter(rowFilter_), columnFilter(columnFilter_), - srcType(srcType_), bufType(bufType_), dstType(dstType_) - { - ksize = Size(rowFilter->ksize, columnFilter->ksize); - anchor = Point(rowFilter->anchor, columnFilter->anchor); - - pbuf = &buf; - } - - SeparableFilterEngine_GPU(const Ptr& rowFilter_, const Ptr& columnFilter_, - int srcType_, int bufType_, int dstType_, - GpuMat& buf_) : - rowFilter(rowFilter_), columnFilter(columnFilter_), - srcType(srcType_), bufType(bufType_), dstType(dstType_) - { - ksize = Size(rowFilter->ksize, columnFilter->ksize); - anchor = Point(rowFilter->anchor, columnFilter->anchor); - - pbuf = &buf_; - } - - virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) - { - CV_Assert(src.type() == srcType); - - Size src_size = src.size(); - - dst.create(src_size, dstType); - - if (roi.size() != src_size) - { - dst.setTo(Scalar::all(0), stream); - } - - ensureSizeIsEnough(src_size, bufType, *pbuf); - - normalizeROI(roi, ksize, anchor, src_size); - - GpuMat srcROI = src(roi); - GpuMat dstROI = dst(roi); - GpuMat bufROI = (*pbuf)(roi); - - (*rowFilter)(srcROI, bufROI, stream); - (*columnFilter)(bufROI, dstROI, stream); - } - - Ptr rowFilter; - Ptr columnFilter; - - int srcType, bufType, dstType; - - Size ksize; - Point anchor; - - GpuMat buf; - GpuMat* pbuf; - }; -} - -Ptr cv::gpu::createSeparableFilter_GPU(const Ptr& rowFilter, - const Ptr& columnFilter, int srcType, int bufType, int dstType) -{ - return Ptr(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType)); -} - -Ptr cv::gpu::createSeparableFilter_GPU(const Ptr& rowFilter, - const Ptr& columnFilter, int srcType, int bufType, int dstType, GpuMat& buf) -{ - return Ptr(new SeparableFilterEngine_GPU(rowFilter, columnFilter, srcType, bufType, dstType, buf)); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // 1D Sum Filter @@ -829,433 +897,6 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke } } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Separable Linear Filter - -namespace filter -{ - template - void linearRow(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); - - template - void linearColumn(PtrStepSzb src, PtrStepSzb dst, const float* kernel, int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); -} - -namespace -{ - typedef NppStatus (*nppFilter1D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oROI, - const Npp32s * pKernel, Npp32s nMaskSize, Npp32s nAnchor, Npp32s nDivisor); - - typedef void (*gpuFilter1D_t)(PtrStepSzb src, PtrStepSzb dst, const float kernel[], int ksize, int anchor, int brd_type, int cc, cudaStream_t stream); - - struct NppLinearRowFilter : public BaseRowFilter_GPU - { - NppLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : - BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, - kernel.ptr(), ksize, anchor, nDivisor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - GpuMat kernel; - Npp32s nDivisor; - nppFilter1D_t func; - }; - - struct GpuLinearRowFilter : public BaseRowFilter_GPU - { - GpuLinearRowFilter(int ksize_, int anchor_, const GpuMat& kernel_, gpuFilter1D_t func_, int brd_type_) : - BaseRowFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - DeviceInfo devInfo; - int cc = devInfo.major() * 10 + devInfo.minor(); - func(src, dst, kernel.ptr(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s)); - } - - GpuMat kernel; - gpuFilter1D_t func; - int brd_type; - }; -} - -Ptr cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel, int anchor, int borderType) -{ - static const gpuFilter1D_t funcs[7][4] = - { - {filter::linearRow, 0, filter::linearRow, filter::linearRow}, - {0, 0, 0, 0}, - {filter::linearRow, 0, filter::linearRow, filter::linearRow}, - {filter::linearRow, 0, filter::linearRow, filter::linearRow}, - {filter::linearRow, 0, filter::linearRow, filter::linearRow}, - {filter::linearRow, 0, filter::linearRow, filter::linearRow}, - {0, 0, 0, 0} - }; - static const nppFilter1D_t npp_funcs[] = - { - 0, nppiFilterRow_8u_C1R, 0, 0, nppiFilterRow_8u_C4R - }; - - if ((bufType == srcType) && (srcType == CV_8UC1 || srcType == CV_8UC4)) - { - CV_Assert( borderType == BORDER_CONSTANT ); - - GpuMat gpu_row_krnl; - int nDivisor; - normalizeKernel(rowKernel, gpu_row_krnl, CV_32S, &nDivisor, true); - - const int ksize = gpu_row_krnl.cols; - normalizeAnchor(anchor, ksize); - - return Ptr(new NppLinearRowFilter(ksize, anchor, gpu_row_krnl, nDivisor, npp_funcs[CV_MAT_CN(srcType)])); - } - - CV_Assert( borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP ); - - const int sdepth = CV_MAT_DEPTH(srcType); - const int cn = CV_MAT_CN(srcType); - CV_Assert( sdepth <= CV_64F && cn <= 4 ); - CV_Assert( CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(bufType) == cn ); - - const gpuFilter1D_t func = funcs[sdepth][cn - 1]; - CV_Assert( func != 0 ); - - GpuMat gpu_row_krnl; - normalizeKernel(rowKernel, gpu_row_krnl, CV_32F); - - const int ksize = gpu_row_krnl.cols; - CV_Assert( ksize > 0 && ksize <= 32 ); - - normalizeAnchor(anchor, ksize); - - return Ptr(new GpuLinearRowFilter(ksize, anchor, gpu_row_krnl, func, borderType)); -} - -namespace -{ - struct NppLinearColumnFilter : public BaseColumnFilter_GPU - { - NppLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter1D_t func_) : - BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, - kernel.ptr(), ksize, anchor, nDivisor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - GpuMat kernel; - Npp32s nDivisor; - nppFilter1D_t func; - }; - - struct GpuLinearColumnFilter : public BaseColumnFilter_GPU - { - GpuLinearColumnFilter(int ksize_, int anchor_, const GpuMat& kernel_, gpuFilter1D_t func_, int brd_type_) : - BaseColumnFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_), brd_type(brd_type_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - DeviceInfo devInfo; - int cc = devInfo.major() * 10 + devInfo.minor(); - if (ksize > 16 && cc < 20) - CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0"); - - func(src, dst, kernel.ptr(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s)); - } - - GpuMat kernel; - gpuFilter1D_t func; - int brd_type; - }; -} - -Ptr cv::gpu::getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel, int anchor, int borderType) -{ - static const gpuFilter1D_t funcs[7][4] = - { - {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, - {0, 0, 0, 0}, - {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, - {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, - {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, - {filter::linearColumn, 0, filter::linearColumn, filter::linearColumn}, - {0, 0, 0, 0} - }; - static const nppFilter1D_t npp_funcs[] = - { - 0, nppiFilterColumn_8u_C1R, 0, 0, nppiFilterColumn_8u_C4R - }; - - if ((bufType == dstType) && (bufType == CV_8UC1 || bufType == CV_8UC4)) - { - CV_Assert( borderType == BORDER_CONSTANT ); - - GpuMat gpu_col_krnl; - int nDivisor; - normalizeKernel(columnKernel, gpu_col_krnl, CV_32S, &nDivisor, true); - - const int ksize = gpu_col_krnl.cols; - normalizeAnchor(anchor, ksize); - - return Ptr(new NppLinearColumnFilter(ksize, anchor, gpu_col_krnl, nDivisor, npp_funcs[CV_MAT_CN(bufType)])); - } - - CV_Assert( borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP ); - - const int ddepth = CV_MAT_DEPTH(dstType); - const int cn = CV_MAT_CN(dstType); - CV_Assert( ddepth <= CV_64F && cn <= 4 ); - CV_Assert( CV_MAT_DEPTH(bufType) == CV_32F && CV_MAT_CN(bufType) == cn ); - - gpuFilter1D_t func = funcs[ddepth][cn - 1]; - CV_Assert( func != 0 ); - - GpuMat gpu_col_krnl; - normalizeKernel(columnKernel, gpu_col_krnl, CV_32F); - - const int ksize = gpu_col_krnl.cols; - CV_Assert(ksize > 0 && ksize <= 32); - - normalizeAnchor(anchor, ksize); - - return Ptr(new GpuLinearColumnFilter(ksize, anchor, gpu_col_krnl, func, borderType)); -} - -Ptr cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, - const Point& anchor, int rowBorderType, int columnBorderType) -{ - if (columnBorderType < 0) - columnBorderType = rowBorderType; - - int cn = CV_MAT_CN(srcType); - int bdepth = CV_32F; - int bufType = CV_MAKETYPE(bdepth, cn); - - Ptr rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, rowBorderType); - Ptr columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, columnBorderType); - - return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType); -} - -Ptr cv::gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, GpuMat& buf, - const Point& anchor, int rowBorderType, int columnBorderType) -{ - if (columnBorderType < 0) - columnBorderType = rowBorderType; - - int cn = CV_MAT_CN(srcType); - int bdepth = CV_32F; - int bufType = CV_MAKETYPE(bdepth, cn); - - Ptr rowFilter = getLinearRowFilter_GPU(srcType, bufType, rowKernel, anchor.x, rowBorderType); - Ptr columnFilter = getLinearColumnFilter_GPU(bufType, dstType, columnKernel, anchor.y, columnBorderType); - - return createSeparableFilter_GPU(rowFilter, columnFilter, srcType, bufType, dstType, buf); -} - -void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, - Point anchor, int rowBorderType, int columnBorderType) -{ - if( ddepth < 0 ) - ddepth = src.depth(); - - dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); - - Ptr f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, anchor, rowBorderType, columnBorderType); - f->apply(src, dst, Rect(0, 0, src.cols, src.rows)); -} - -void cv::gpu::sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf, - Point anchor, int rowBorderType, int columnBorderType, - Stream& stream) -{ - if( ddepth < 0 ) - ddepth = src.depth(); - - dst.create(src.size(), CV_MAKETYPE(ddepth, src.channels())); - - Ptr f = createSeparableLinearFilter_GPU(src.type(), dst.type(), kernelX, kernelY, buf, anchor, rowBorderType, columnBorderType); - f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Deriv Filter - -Ptr cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int rowBorderType, int columnBorderType) -{ - Mat kx, ky; - getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F); - return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, Point(-1,-1), rowBorderType, columnBorderType); -} - -Ptr cv::gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, GpuMat& buf, int rowBorderType, int columnBorderType) -{ - Mat kx, ky; - getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F); - return createSeparableLinearFilter_GPU(srcType, dstType, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType); -} - -void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType) -{ - GpuMat buf; - Sobel(src, dst, ddepth, dx, dy, buf, ksize, scale, rowBorderType, columnBorderType); -} - -void cv::gpu::Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize, double scale, int rowBorderType, int columnBorderType, Stream& stream) -{ - Mat kx, ky; - getDerivKernels(kx, ky, dx, dy, ksize, false, CV_32F); - - if (scale != 1) - { - // usually the smoothing part is the slowest to compute, - // so try to scale it instead of the faster differenciating part - if (dx == 0) - kx *= scale; - else - ky *= scale; - } - - sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream); -} - -void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType) -{ - GpuMat buf; - Scharr(src, dst, ddepth, dx, dy, buf, scale, rowBorderType, columnBorderType); -} - -void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale, int rowBorderType, int columnBorderType, Stream& stream) -{ - Mat kx, ky; - getDerivKernels(kx, ky, dx, dy, -1, false, CV_32F); - - if( scale != 1 ) - { - // usually the smoothing part is the slowest to compute, - // so try to scale it instead of the faster differenciating part - if( dx == 0 ) - kx *= scale; - else - ky *= scale; - } - - sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream); -} - -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Gaussian Filter - -Ptr cv::gpu::createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType) -{ - int depth = CV_MAT_DEPTH(type); - - if (sigma2 <= 0) - sigma2 = sigma1; - - // automatic detection of kernel size from sigma - if (ksize.width <= 0 && sigma1 > 0) - ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1; - if (ksize.height <= 0 && sigma2 > 0) - ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1; - - CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 ); - - sigma1 = std::max(sigma1, 0.0); - sigma2 = std::max(sigma2, 0.0); - - Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) ); - Mat ky; - if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON ) - ky = kx; - else - ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) ); - - return createSeparableLinearFilter_GPU(type, type, kx, ky, Point(-1,-1), rowBorderType, columnBorderType); -} - -Ptr cv::gpu::createGaussianFilter_GPU(int type, Size ksize, GpuMat& buf, double sigma1, double sigma2, int rowBorderType, int columnBorderType) -{ - int depth = CV_MAT_DEPTH(type); - - if (sigma2 <= 0) - sigma2 = sigma1; - - // automatic detection of kernel size from sigma - if (ksize.width <= 0 && sigma1 > 0) - ksize.width = cvRound(sigma1 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1; - if (ksize.height <= 0 && sigma2 > 0) - ksize.height = cvRound(sigma2 * (depth == CV_8U ? 3 : 4)*2 + 1) | 1; - - CV_Assert( ksize.width > 0 && ksize.width % 2 == 1 && ksize.height > 0 && ksize.height % 2 == 1 ); - - sigma1 = std::max(sigma1, 0.0); - sigma2 = std::max(sigma2, 0.0); - - Mat kx = getGaussianKernel( ksize.width, sigma1, std::max(depth, CV_32F) ); - Mat ky; - if( ksize.height == ksize.width && std::abs(sigma1 - sigma2) < DBL_EPSILON ) - ky = kx; - else - ky = getGaussianKernel( ksize.height, sigma2, std::max(depth, CV_32F) ); - - return createSeparableLinearFilter_GPU(type, type, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType); -} - -void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType) -{ - if (ksize.width == 1 && ksize.height == 1) - { - src.copyTo(dst); - return; - } - - dst.create(src.size(), src.type()); - - Ptr f = createGaussianFilter_GPU(src.type(), ksize, sigma1, sigma2, rowBorderType, columnBorderType); - f->apply(src, dst, Rect(0, 0, src.cols, src.rows)); -} - -void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2, int rowBorderType, int columnBorderType, Stream& stream) -{ - if (ksize.width == 1 && ksize.height == 1) - { - src.copyTo(dst); - return; - } - - dst.create(src.size(), src.type()); - - Ptr f = createGaussianFilter_GPU(src.type(), ksize, buf, sigma1, sigma2, rowBorderType, columnBorderType); - f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Image Rank Filter diff --git a/modules/gpufilters/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp index 42018424d..e052ede6c 100644 --- a/modules/gpufilters/test/test_filters.cpp +++ b/modules/gpufilters/test/test_filters.cpp @@ -215,13 +215,74 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine( testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))), WHOLE_SUBMAT)); +///////////////////////////////////////////////////////////////////////////////////////////////// +// SeparableLinearFilter +PARAM_TEST_CASE(SeparableLinearFilter, cv::gpu::DeviceInfo, cv::Size, MatDepth, Channels, KSize, Anchor, BorderType, UseRoi) +{ + cv::gpu::DeviceInfo devInfo; + cv::Size size; + int depth; + int cn; + cv::Size ksize; + cv::Point anchor; + int borderType; + bool useRoi; + int type; + virtual void SetUp() + { + devInfo = GET_PARAM(0); + size = GET_PARAM(1); + depth = GET_PARAM(2); + cn = GET_PARAM(3); + ksize = GET_PARAM(4); + anchor = GET_PARAM(5); + borderType = GET_PARAM(6); + useRoi = GET_PARAM(7); + cv::gpu::setDevice(devInfo.deviceID()); + type = CV_MAKE_TYPE(depth, cn); + } +}; +GPU_TEST_P(SeparableLinearFilter, Accuracy) +{ + cv::Mat src = randomMat(size, type); + cv::Mat rowKernel = randomMat(Size(ksize.width, 1), CV_32FC1, 0.0, 1.0); + cv::Mat columnKernel = randomMat(Size(ksize.height, 1), CV_32FC1, 0.0, 1.0); + cv::Ptr filter = cv::gpu::createSeparableLinearFilter(src.type(), -1, rowKernel, columnKernel, anchor, borderType); + + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + filter->apply(loadMat(src, useRoi), dst); + + cv::Mat dst_gold; + cv::sepFilter2D(src, dst_gold, -1, rowKernel, columnKernel, anchor, 0, borderType); + + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 1.0 : 1e-2); +} + +INSTANTIATE_TEST_CASE_P(GPU_Filters, SeparableLinearFilter, testing::Combine( + ALL_DEVICES, + DIFFERENT_SIZES, + testing::Values(MatDepth(CV_8U), MatDepth(CV_16U), MatDepth(CV_16S), MatDepth(CV_32F)), + IMAGE_CHANNELS, + testing::Values(KSize(cv::Size(3, 3)), + KSize(cv::Size(7, 7)), + KSize(cv::Size(13, 13)), + KSize(cv::Size(15, 15)), + KSize(cv::Size(17, 17)), + KSize(cv::Size(23, 15)), + KSize(cv::Size(31, 3))), + testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))), + testing::Values(BorderType(cv::BORDER_REFLECT101), + BorderType(cv::BORDER_REPLICATE), + BorderType(cv::BORDER_CONSTANT), + BorderType(cv::BORDER_REFLECT)), + WHOLE_SUBMAT)); ///////////////////////////////////////////////////////////////////////////////////////////////// // Sobel @@ -265,13 +326,15 @@ GPU_TEST_P(Sobel, Accuracy) cv::Mat src = randomMat(size, type); + cv::Ptr sobel = cv::gpu::createSobelFilter(src.type(), -1, dx, dy, ksize.width, 1.0, borderType); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::Sobel(loadMat(src, useRoi), dst, -1, dx, dy, ksize.width, 1.0, borderType); + sobel->apply(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::Sobel(src, dst_gold, -1, dx, dy, ksize.width, 1.0, 0.0, borderType); - EXPECT_MAT_NEAR(getInnerROI(dst_gold, ksize), getInnerROI(dst, ksize), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1); + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 0.1); } INSTANTIATE_TEST_CASE_P(GPU_Filters, Sobel, testing::Combine( @@ -328,13 +391,15 @@ GPU_TEST_P(Scharr, Accuracy) cv::Mat src = randomMat(size, type); + cv::Ptr scharr = cv::gpu::createScharrFilter(src.type(), -1, dx, dy, 1.0, borderType); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::Scharr(loadMat(src, useRoi), dst, -1, dx, dy, 1.0, borderType); + scharr->apply(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::Scharr(src, dst_gold, -1, dx, dy, 1.0, 0.0, borderType); - EXPECT_MAT_NEAR(getInnerROI(dst_gold, cv::Size(3, 3)), getInnerROI(dst, cv::Size(3, 3)), CV_MAT_DEPTH(type) < CV_32F ? 0.0 : 0.1); + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 0.1); } INSTANTIATE_TEST_CASE_P(GPU_Filters, Scharr, testing::Combine( @@ -387,28 +452,15 @@ GPU_TEST_P(GaussianBlur, Accuracy) double sigma1 = randomDouble(0.1, 1.0); double sigma2 = randomDouble(0.1, 1.0); - if (ksize.height > 16 && !supportFeature(devInfo, cv::gpu::FEATURE_SET_COMPUTE_20)) - { - try - { - cv::gpu::GpuMat dst; - cv::gpu::GaussianBlur(loadMat(src), dst, ksize, sigma1, sigma2, borderType); - } - catch (const cv::Exception& e) - { - ASSERT_EQ(cv::Error::StsNotImplemented, e.code); - } - } - else - { - cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::GaussianBlur(loadMat(src, useRoi), dst, ksize, sigma1, sigma2, borderType); + cv::Ptr gauss = cv::gpu::createGaussianFilter(src.type(), -1, ksize, sigma1, sigma2, borderType); - cv::Mat dst_gold; - cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); + gauss->apply(loadMat(src, useRoi), dst); - EXPECT_MAT_NEAR(dst_gold, dst, 4.0); - } + cv::Mat dst_gold; + cv::GaussianBlur(src, dst_gold, ksize, sigma1, sigma2, borderType); + + EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 4.0 : 1e-4); } INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine( @@ -437,6 +489,15 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine( BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); + + + + + + + + + ///////////////////////////////////////////////////////////////////////////////////////////////// // Erode diff --git a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp index cf1b8e670..3fe9f82f4 100644 --- a/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp +++ b/modules/gpuimgproc/include/opencv2/gpuimgproc.hpp @@ -158,7 +158,7 @@ struct CV_EXPORTS CannyBuf GpuMat mag; GpuMat map; GpuMat st1, st2; - Ptr filterDX, filterDY; + Ptr filterDX, filterDY; }; CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double low_thresh, double high_thresh, int apperture_size = 3, bool L2gradient = false); diff --git a/modules/gpuimgproc/src/canny.cpp b/modules/gpuimgproc/src/canny.cpp index 8d361fe50..9a3357564 100644 --- a/modules/gpuimgproc/src/canny.cpp +++ b/modules/gpuimgproc/src/canny.cpp @@ -65,8 +65,8 @@ void cv::gpu::CannyBuf::create(const Size& image_size, int apperture_size) if (apperture_size != 3) { - filterDX = createDerivFilter_GPU(CV_8UC1, CV_32S, 1, 0, apperture_size, BORDER_REPLICATE); - filterDY = createDerivFilter_GPU(CV_8UC1, CV_32S, 0, 1, apperture_size, BORDER_REPLICATE); + filterDX = createDerivFilter(CV_8UC1, CV_32S, 1, 0, apperture_size, false, 1, BORDER_REPLICATE); + filterDY = createDerivFilter(CV_8UC1, CV_32S, 0, 1, apperture_size, false, 1, BORDER_REPLICATE); } } @@ -150,8 +150,8 @@ void cv::gpu::Canny(const GpuMat& src, CannyBuf& buf, GpuMat& dst, double low_th } else { - buf.filterDX->apply(src, buf.dx, Rect(0, 0, src.cols, src.rows)); - buf.filterDY->apply(src, buf.dy, Rect(0, 0, src.cols, src.rows)); + buf.filterDX->apply(src, buf.dx); + buf.filterDY->apply(src, buf.dy); calcMagnitude(buf.dx, buf.dy, buf.mag, L2gradient); } diff --git a/modules/gpuimgproc/src/corners.cpp b/modules/gpuimgproc/src/corners.cpp index 44dc1505d..824a3308e 100644 --- a/modules/gpuimgproc/src/corners.cpp +++ b/modules/gpuimgproc/src/corners.cpp @@ -70,6 +70,8 @@ namespace { void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream) { + (void) buf; + double scale = static_cast(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize; if (ksize < 0) @@ -83,16 +85,21 @@ namespace Dx.create(src.size(), CV_32F); Dy.create(src.size(), CV_32F); + Ptr filterDx, filterDy; + if (ksize > 0) { - Sobel(src, Dx, CV_32F, 1, 0, buf, ksize, scale, borderType, -1, stream); - Sobel(src, Dy, CV_32F, 0, 1, buf, ksize, scale, borderType, -1, stream); + filterDx = gpu::createSobelFilter(src.type(), CV_32F, 1, 0, ksize, scale, borderType); + filterDy = gpu::createSobelFilter(src.type(), CV_32F, 0, 1, ksize, scale, borderType); } else { - Scharr(src, Dx, CV_32F, 1, 0, buf, scale, borderType, -1, stream); - Scharr(src, Dy, CV_32F, 0, 1, buf, scale, borderType, -1, stream); + filterDx = gpu::createScharrFilter(src.type(), CV_32F, 1, 0, scale, borderType); + filterDy = gpu::createScharrFilter(src.type(), CV_32F, 0, 1, scale, borderType); } + + filterDx->apply(src, Dx); + filterDy->apply(src, Dy); } } diff --git a/modules/superres/src/btv_l1_gpu.cpp b/modules/superres/src/btv_l1_gpu.cpp index 6813187c4..7b2ad7370 100644 --- a/modules/superres/src/btv_l1_gpu.cpp +++ b/modules/superres/src/btv_l1_gpu.cpp @@ -230,7 +230,7 @@ namespace Ptr opticalFlow_; private: - std::vector > filters_; + std::vector > filters_; int curBlurKernelSize_; double curBlurSigma_; int curSrcType_; @@ -299,7 +299,7 @@ namespace { filters_.resize(src.size()); for (size_t i = 0; i < src.size(); ++i) - filters_[i] = createGaussianFilter_GPU(src[0].type(), Size(blurKernelSize_, blurKernelSize_), blurSigma_); + filters_[i] = gpu::createGaussianFilter(src[0].type(), -1, Size(blurKernelSize_, blurKernelSize_), blurSigma_); curBlurKernelSize_ = blurKernelSize_; curBlurSigma_ = blurSigma_; curSrcType_ = src[0].type(); @@ -346,7 +346,7 @@ namespace // a = M * Ih gpu::remap(highRes_, a_[k], backwardMaps_[k].first, backwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]); // b = HM * Ih - filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1), streams_[k]); + filters_[k]->apply(a_[k], b_[k], streams_[k]); // c = DHF * Ih gpu::resize(b_[k], c_[k], lowResSize, 0, 0, INTER_NEAREST, streams_[k]); @@ -355,7 +355,7 @@ namespace // a = Dt * diff upscale(c_[k], a_[k], scale_, streams_[k]); // b = HtDt * diff - filters_[k]->apply(a_[k], b_[k], Rect(0,0,-1,-1), streams_[k]); + filters_[k]->apply(a_[k], b_[k], streams_[k]); // diffTerm = MtHtDt * diff gpu::remap(b_[k], diffTerms_[k], forwardMaps_[k].first, forwardMaps_[k].second, INTER_NEAREST, BORDER_REPLICATE, Scalar(), streams_[k]); } diff --git a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp index 87b525599..1815cc6de 100644 --- a/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp +++ b/samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp @@ -308,6 +308,8 @@ Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2) gpu::split(tmp2, vI2); Scalar mssim; + Ptr gauss = gpu::createGaussianFilter(vI2[0].type(), -1, Size(11, 11), 1.5); + for( int i = 0; i < gI1.channels(); ++i ) { gpu::GpuMat I2_2, I1_2, I1_I2; @@ -318,8 +320,8 @@ Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2) /*************************** END INITS **********************************/ gpu::GpuMat mu1, mu2; // PRELIMINARY COMPUTING - gpu::GaussianBlur(vI1[i], mu1, Size(11, 11), 1.5); - gpu::GaussianBlur(vI2[i], mu2, Size(11, 11), 1.5); + gauss->apply(vI1[i], mu1); + gauss->apply(vI2[i], mu2); gpu::GpuMat mu1_2, mu2_2, mu1_mu2; gpu::multiply(mu1, mu1, mu1_2); @@ -328,13 +330,13 @@ Scalar getMSSIM_GPU( const Mat& i1, const Mat& i2) gpu::GpuMat sigma1_2, sigma2_2, sigma12; - gpu::GaussianBlur(I1_2, sigma1_2, Size(11, 11), 1.5); + gauss->apply(I1_2, sigma1_2); gpu::subtract(sigma1_2, mu1_2, sigma1_2); // sigma1_2 -= mu1_2; - gpu::GaussianBlur(I2_2, sigma2_2, Size(11, 11), 1.5); + gauss->apply(I2_2, sigma2_2); gpu::subtract(sigma2_2, mu2_2, sigma2_2); // sigma2_2 -= mu2_2; - gpu::GaussianBlur(I1_I2, sigma12, Size(11, 11), 1.5); + gauss->apply(I1_I2, sigma12); gpu::subtract(sigma12, mu1_mu2, sigma12); // sigma12 -= mu1_mu2; ///////////////////////////////// FORMULA //////////////////////////////// @@ -375,7 +377,7 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b) gpu::split(b.t2, b.vI2, stream); Scalar mssim; - gpu::GpuMat buf; + Ptr gauss = gpu::createGaussianFilter(b.vI1[0].type(), -1, Size(11, 11), 1.5); for( int i = 0; i < b.gI1.channels(); ++i ) { @@ -383,22 +385,22 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b) gpu::multiply(b.vI1[i], b.vI1[i], b.I1_2, 1, -1, stream); // I1^2 gpu::multiply(b.vI1[i], b.vI2[i], b.I1_I2, 1, -1, stream); // I1 * I2 - gpu::GaussianBlur(b.vI1[i], b.mu1, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream); - gpu::GaussianBlur(b.vI2[i], b.mu2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream); + gauss->apply(b.vI1[i], b.mu1, stream); + gauss->apply(b.vI2[i], b.mu2, stream); gpu::multiply(b.mu1, b.mu1, b.mu1_2, 1, -1, stream); gpu::multiply(b.mu2, b.mu2, b.mu2_2, 1, -1, stream); gpu::multiply(b.mu1, b.mu2, b.mu1_mu2, 1, -1, stream); - gpu::GaussianBlur(b.I1_2, b.sigma1_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream); + gauss->apply(b.I1_2, b.sigma1_2, stream); gpu::subtract(b.sigma1_2, b.mu1_2, b.sigma1_2, gpu::GpuMat(), -1, stream); //b.sigma1_2 -= b.mu1_2; - This would result in an extra data transfer operation - gpu::GaussianBlur(b.I2_2, b.sigma2_2, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream); + gauss->apply(b.I2_2, b.sigma2_2, stream); gpu::subtract(b.sigma2_2, b.mu2_2, b.sigma2_2, gpu::GpuMat(), -1, stream); //b.sigma2_2 -= b.mu2_2; - gpu::GaussianBlur(b.I1_I2, b.sigma12, Size(11, 11), buf, 1.5, 0, BORDER_DEFAULT, -1, stream); + gauss->apply(b.I1_I2, b.sigma12, stream); gpu::subtract(b.sigma12, b.mu1_mu2, b.sigma12, gpu::GpuMat(), -1, stream); //b.sigma12 -= b.mu1_mu2; diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index f6ace6d77..cbe50a40f 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -929,10 +929,12 @@ TEST(GaussianBlur) gpu::GpuMat d_dst(src.size(), src.type()); gpu::GpuMat d_buf; - gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1); + cv::Ptr gauss = cv::gpu::createGaussianFilter(d_src.type(), -1, cv::Size(3, 3), 1); + + gauss->apply(d_src, d_dst); GPU_ON; - gpu::GaussianBlur(d_src, d_dst, Size(3, 3), d_buf, 1); + gauss->apply(d_src, d_dst); GPU_OFF; } } From 5720eaf35472b6b40985792c7a9c338b74ed1652 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 16:03:16 +0400 Subject: [PATCH 078/121] refactored Morphology Filters --- modules/gpubgsegm/src/fgd.cpp | 16 +- .../gpufilters/include/opencv2/gpufilters.hpp | 61 +- modules/gpufilters/perf/perf_filters.cpp | 23 +- modules/gpufilters/src/filtering.cpp | 559 +++++++++--------- modules/gpufilters/test/test_filters.cpp | 21 +- samples/gpu/morphology.cpp | 226 ++++--- samples/gpu/performance/tests.cpp | 6 +- 7 files changed, 503 insertions(+), 409 deletions(-) diff --git a/modules/gpubgsegm/src/fgd.cpp b/modules/gpubgsegm/src/fgd.cpp index 1b4038304..fb14ff172 100644 --- a/modules/gpubgsegm/src/fgd.cpp +++ b/modules/gpubgsegm/src/fgd.cpp @@ -228,11 +228,10 @@ private: cv::gpu::GpuMat countBuf_; cv::gpu::GpuMat buf_; - cv::gpu::GpuMat filterBuf_; cv::gpu::GpuMat filterBrd_; - cv::Ptr dilateFilter_; - cv::Ptr erodeFilter_; + cv::Ptr dilateFilter_; + cv::Ptr erodeFilter_; CvMemStorage* storage_; }; @@ -305,8 +304,8 @@ void cv::gpu::FGDStatModel::Impl::create(const cv::gpu::GpuMat& firstFrame, cons cv::Mat kernel = cv::getStructuringElement(cv::MORPH_RECT, cv::Size(1 + params_.perform_morphing * 2, 1 + params_.perform_morphing * 2)); cv::Point anchor(params_.perform_morphing, params_.perform_morphing); - dilateFilter_ = cv::gpu::createMorphologyFilter_GPU(cv::MORPH_DILATE, CV_8UC1, kernel, filterBuf_, anchor); - erodeFilter_ = cv::gpu::createMorphologyFilter_GPU(cv::MORPH_ERODE, CV_8UC1, kernel, filterBuf_, anchor); + dilateFilter_ = cv::gpu::createMorphologyFilter(cv::MORPH_DILATE, CV_8UC1, kernel, anchor); + erodeFilter_ = cv::gpu::createMorphologyFilter(cv::MORPH_ERODE, CV_8UC1, kernel, anchor); } } @@ -326,7 +325,6 @@ void cv::gpu::FGDStatModel::Impl::release() countBuf_.release(); buf_.release(); - filterBuf_.release(); filterBrd_.release(); } @@ -488,14 +486,14 @@ namespace namespace { - void morphology(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, cv::gpu::GpuMat& filterBrd, int brd, cv::Ptr& filter, cv::Scalar brdVal) + void morphology(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& dst, cv::gpu::GpuMat& filterBrd, int brd, cv::Ptr& filter, cv::Scalar brdVal) { cv::gpu::copyMakeBorder(src, filterBrd, brd, brd, brd, brd, cv::BORDER_CONSTANT, brdVal); - filter->apply(filterBrd(cv::Rect(brd, brd, src.cols, src.rows)), dst, cv::Rect(0, 0, src.cols, src.rows)); + filter->apply(filterBrd(cv::Rect(brd, brd, src.cols, src.rows)), dst); } void smoothForeground(cv::gpu::GpuMat& foreground, cv::gpu::GpuMat& filterBrd, cv::gpu::GpuMat& buf, - cv::Ptr& erodeFilter, cv::Ptr& dilateFilter, + cv::Ptr& erodeFilter, cv::Ptr& dilateFilter, const cv::gpu::FGDStatModel::Params& params) { const int brd = params.perform_morphing; diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index bf7a8e928..7595bd543 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -48,6 +48,7 @@ #endif #include "opencv2/core/gpu.hpp" +#include "opencv2/imgproc.hpp" #if defined __GNUC__ #define __OPENCV_GPUFILTERS_DEPR_BEFORE__ @@ -203,8 +204,42 @@ inline void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sig f->apply(src, dst, stream); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Morphology Filter +//! returns 2D morphological filter +//! supports CV_8UC1 and CV_8UC4 types +CV_EXPORTS Ptr createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1); +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void erode(InputArray src, OutputArray dst, InputArray kernel, + Point anchor = Point(-1, -1), int iterations = 1, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void erode(InputArray src, OutputArray dst, InputArray kernel, Point anchor, int iterations, Stream& stream) +{ + Ptr f = gpu::createMorphologyFilter(MORPH_ERODE, src.type(), kernel, anchor, iterations); + f->apply(src, dst, stream); +} + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void dilate(InputArray src, OutputArray dst, InputArray kernel, + Point anchor = Point(-1, -1), int iterations = 1, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void dilate(InputArray src, OutputArray dst, InputArray kernel, Point anchor, int iterations, Stream& stream) +{ + Ptr f = gpu::createMorphologyFilter(MORPH_DILATE, src.type(), kernel, anchor, iterations); + f->apply(src, dst, stream); +} + +__OPENCV_GPUFILTERS_DEPR_BEFORE__ void morphologyEx(InputArray src, OutputArray dst, int op, + InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1, + Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; + +inline void morphologyEx(InputArray src, OutputArray dst, int op, InputArray kernel, Point anchor, int iterations, Stream& stream) +{ + Ptr f = gpu::createMorphologyFilter(op, src.type(), kernel, anchor, iterations); + f->apply(src, dst, stream); +} @@ -285,18 +320,7 @@ CV_EXPORTS Ptr getColumnSumFilter_GPU(int sumType, int dst -//! returns 2D morphological filter -//! only MORPH_ERODE and MORPH_DILATE are supported -//! supports CV_8UC1 and CV_8UC4 types -//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height -CV_EXPORTS Ptr getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, - Point anchor=Point(-1,-1)); -//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported. -CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat& kernel, - const Point& anchor = Point(-1,-1), int iterations = 1); -CV_EXPORTS Ptr createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf, - const Point& anchor = Point(-1,-1), int iterations = 1); @@ -310,22 +334,7 @@ CV_EXPORTS Ptr getMinFilter_GPU(int srcType, int dstType, const -//! erodes the image (applies the local minimum operator) -CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1); -CV_EXPORTS void erode(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, - Point anchor = Point(-1, -1), int iterations = 1, - Stream& stream = Stream::Null()); -//! dilates the image (applies the local maximum operator) -CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1); -CV_EXPORTS void dilate(const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, - Point anchor = Point(-1, -1), int iterations = 1, - Stream& stream = Stream::Null()); - -//! applies an advanced morphological operation to the image -CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor = Point(-1, -1), int iterations = 1); -CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, - Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()); diff --git a/modules/gpufilters/perf/perf_filters.cpp b/modules/gpufilters/perf/perf_filters.cpp index 9c7dcb8b4..6ad0998a5 100644 --- a/modules/gpufilters/perf/perf_filters.cpp +++ b/modules/gpufilters/perf/perf_filters.cpp @@ -263,13 +263,6 @@ PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Value } } - - - - - - - ////////////////////////////////////////////////////////////////////// // Erode @@ -289,9 +282,10 @@ PERF_TEST_P(Sz_Type, Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8U { const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf; - TEST_CYCLE() cv::gpu::erode(d_src, dst, ker, d_buf); + cv::Ptr erode = cv::gpu::createMorphologyFilter(cv::MORPH_ERODE, src.type(), ker); + + TEST_CYCLE() erode->apply(d_src, dst); GPU_SANITY_CHECK(dst); } @@ -324,9 +318,10 @@ PERF_TEST_P(Sz_Type, Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8 { const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf; - TEST_CYCLE() cv::gpu::dilate(d_src, dst, ker, d_buf); + cv::Ptr dilate = cv::gpu::createMorphologyFilter(cv::MORPH_DILATE, src.type(), ker); + + TEST_CYCLE() dilate->apply(d_src, dst); GPU_SANITY_CHECK(dst); } @@ -364,10 +359,10 @@ PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8 { const cv::gpu::GpuMat d_src(src); cv::gpu::GpuMat dst; - cv::gpu::GpuMat d_buf1; - cv::gpu::GpuMat d_buf2; - TEST_CYCLE() cv::gpu::morphologyEx(d_src, dst, morphOp, ker, d_buf1, d_buf2); + cv::Ptr morph = cv::gpu::createMorphologyFilter(morphOp, src.type(), ker); + + TEST_CYCLE() morph->apply(d_src, dst); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index c4de57440..145e04984 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -61,6 +61,8 @@ Ptr cv::gpu::createScharrFilter(int, int, int, int, double, int, int) { Ptr cv::gpu::createGaussianFilter(int, int, Size, double, double, int, int) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createMorphologyFilter(int, int, InputArray, Point, int) { throw_no_cuda(); return Ptr(); } + @@ -68,18 +70,9 @@ Ptr cv::gpu::createGaussianFilter(int, int, Size, double, double, int, i Ptr cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } -void cv::gpu::erode(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_cuda(); } -void cv::gpu::erode(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } -void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_cuda(); } -void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } -void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_no_cuda(); } -void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); } #else @@ -506,6 +499,297 @@ Ptr cv::gpu::createGaussianFilter(int srcType, int dstType, Size ksize, return createSeparableLinearFilter(srcType, dstType, kx, ky, Point(-1,-1), rowBorderMode, columnBorderMode); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Morphology Filter + +namespace +{ + class MorphologyFilter : public Filter + { + public: + MorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + typedef NppStatus (*nppMorfFilter_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI, + const Npp8u* pMask, NppiSize oMaskSize, NppiPoint oAnchor); + + int type_; + GpuMat kernel_; + Point anchor_; + int iters_; + nppMorfFilter_t func_; + + GpuMat srcBorder_; + GpuMat buf_; + }; + + MorphologyFilter::MorphologyFilter(int op, int srcType, InputArray _kernel, Point anchor, int iterations) : + type_(srcType), anchor_(anchor), iters_(iterations) + { + static const nppMorfFilter_t funcs[2][5] = + { + {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R }, + {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R } + }; + + CV_Assert( op == MORPH_ERODE || op == MORPH_DILATE ); + CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 ); + + Mat kernel = _kernel.getMat(); + Size ksize = !kernel.empty() ? _kernel.size() : Size(3, 3); + + normalizeAnchor(anchor_, ksize); + + if (kernel.empty()) + { + kernel = getStructuringElement(MORPH_RECT, Size(1 + iters_ * 2, 1 + iters_ * 2)); + anchor_ = Point(iters_, iters_); + iters_ = 1; + } + else if (iters_ > 1 && countNonZero(kernel) == (int) kernel.total()) + { + anchor_ = Point(anchor_.x * iters_, anchor_.y * iters_); + kernel = getStructuringElement(MORPH_RECT, + Size(ksize.width + (iters_ - 1) * (ksize.width - 1), + ksize.height + (iters_ - 1) * (ksize.height - 1)), + anchor_); + iters_ = 1; + } + + CV_Assert( kernel.channels() == 1 ); + + Mat kernel8U; + kernel.convertTo(kernel8U, CV_8U); + + kernel_ = gpu::createContinuous(kernel.size(), CV_8UC1); + kernel_.upload(kernel8U); + + func_ = funcs[op][CV_MAT_CN(srcType)]; + } + + void MorphologyFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == type_ ); + + Size ksize = kernel_.size(); + gpu::copyMakeBorder(src, srcBorder_, ksize.height, ksize.height, ksize.width, ksize.width, BORDER_DEFAULT, Scalar(), _stream); + + GpuMat srcRoi = srcBorder_(Rect(ksize.width, ksize.height, src.cols, src.rows)); + + GpuMat bufRoi; + if (iters_ > 1) + { + ensureSizeIsEnough(srcBorder_.size(), type_, buf_); + buf_.setTo(Scalar::all(0), _stream); + bufRoi = buf_(Rect(ksize.width, ksize.height, src.cols, src.rows)); + } + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + NppStreamHandler h(stream); + + NppiSize oSizeROI; + oSizeROI.width = src.cols; + oSizeROI.height = src.rows; + + NppiSize oMaskSize; + oMaskSize.height = ksize.height; + oMaskSize.width = ksize.width; + + NppiPoint oAnchor; + oAnchor.x = anchor_.x; + oAnchor.y = anchor_.y; + + nppSafeCall( func_(srcRoi.ptr(), static_cast(srcRoi.step), dst.ptr(), static_cast(dst.step), + oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); + + for(int i = 1; i < iters_; ++i) + { + dst.copyTo(bufRoi, _stream); + + nppSafeCall( func_(bufRoi.ptr(), static_cast(bufRoi.step), dst.ptr(), static_cast(dst.step), + oSizeROI, kernel_.ptr(), oMaskSize, oAnchor) ); + } + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +namespace +{ + class MorphologyExFilter : public Filter + { + public: + MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations); + + protected: + Ptr erodeFilter_, dilateFilter_; + GpuMat buf_; + }; + + MorphologyExFilter::MorphologyExFilter(int srcType, InputArray kernel, Point anchor, int iterations) + { + erodeFilter_ = gpu::createMorphologyFilter(MORPH_ERODE, srcType, kernel, anchor, iterations); + dilateFilter_ = gpu::createMorphologyFilter(MORPH_DILATE, srcType, kernel, anchor, iterations); + } + + // MORPH_OPEN + + class MorphologyOpenFilter : public MorphologyExFilter + { + public: + MorphologyOpenFilter(int srcType, InputArray kernel, Point anchor, int iterations); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + }; + + MorphologyOpenFilter::MorphologyOpenFilter(int srcType, InputArray kernel, Point anchor, int iterations) : + MorphologyExFilter(srcType, kernel, anchor, iterations) + { + } + + void MorphologyOpenFilter::apply(InputArray src, OutputArray dst, Stream& stream) + { + erodeFilter_->apply(src, buf_, stream); + dilateFilter_->apply(buf_, dst, stream); + } + + // MORPH_CLOSE + + class MorphologyCloseFilter : public MorphologyExFilter + { + public: + MorphologyCloseFilter(int srcType, InputArray kernel, Point anchor, int iterations); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + }; + + MorphologyCloseFilter::MorphologyCloseFilter(int srcType, InputArray kernel, Point anchor, int iterations) : + MorphologyExFilter(srcType, kernel, anchor, iterations) + { + } + + void MorphologyCloseFilter::apply(InputArray src, OutputArray dst, Stream& stream) + { + dilateFilter_->apply(src, buf_, stream); + erodeFilter_->apply(buf_, dst, stream); + } + + // MORPH_GRADIENT + + class MorphologyGradientFilter : public MorphologyExFilter + { + public: + MorphologyGradientFilter(int srcType, InputArray kernel, Point anchor, int iterations); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + }; + + MorphologyGradientFilter::MorphologyGradientFilter(int srcType, InputArray kernel, Point anchor, int iterations) : + MorphologyExFilter(srcType, kernel, anchor, iterations) + { + } + + void MorphologyGradientFilter::apply(InputArray src, OutputArray dst, Stream& stream) + { + erodeFilter_->apply(src, buf_, stream); + dilateFilter_->apply(src, dst, stream); + gpu::subtract(dst, buf_, dst, noArray(), -1, stream); + } + + // MORPH_TOPHAT + + class MorphologyTophatFilter : public MorphologyExFilter + { + public: + MorphologyTophatFilter(int srcType, InputArray kernel, Point anchor, int iterations); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + }; + + MorphologyTophatFilter::MorphologyTophatFilter(int srcType, InputArray kernel, Point anchor, int iterations) : + MorphologyExFilter(srcType, kernel, anchor, iterations) + { + } + + void MorphologyTophatFilter::apply(InputArray src, OutputArray dst, Stream& stream) + { + erodeFilter_->apply(src, dst, stream); + dilateFilter_->apply(dst, buf_, stream); + gpu::subtract(src, buf_, dst, noArray(), -1, stream); + } + + // MORPH_BLACKHAT + + class MorphologyBlackhatFilter : public MorphologyExFilter + { + public: + MorphologyBlackhatFilter(int srcType, InputArray kernel, Point anchor, int iterations); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + }; + + MorphologyBlackhatFilter::MorphologyBlackhatFilter(int srcType, InputArray kernel, Point anchor, int iterations) : + MorphologyExFilter(srcType, kernel, anchor, iterations) + { + } + + void MorphologyBlackhatFilter::apply(InputArray src, OutputArray dst, Stream& stream) + { + dilateFilter_->apply(src, dst, stream); + erodeFilter_->apply(dst, buf_, stream); + gpu::subtract(buf_, src, dst, noArray(), -1, stream); + } +} + +Ptr cv::gpu::createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor, int iterations) +{ + switch( op ) + { + case MORPH_ERODE: + case MORPH_DILATE: + return new MorphologyFilter(op, srcType, kernel, anchor, iterations); + break; + + case MORPH_OPEN: + return new MorphologyOpenFilter(srcType, kernel, anchor, iterations); + break; + + case MORPH_CLOSE: + return new MorphologyCloseFilter(srcType, kernel, anchor, iterations); + break; + + case MORPH_GRADIENT: + return new MorphologyGradientFilter(srcType, kernel, anchor, iterations); + break; + + case MORPH_TOPHAT: + return new MorphologyTophatFilter(srcType, kernel, anchor, iterations); + break; + + case MORPH_BLACKHAT: + return new MorphologyBlackhatFilter(srcType, kernel, anchor, iterations); + break; + + default: + CV_Error(Error::StsBadArg, "Unknown morphological operation"); + return Ptr(); + } +} + + + + + + + + @@ -638,264 +922,7 @@ Ptr cv::gpu::getColumnSumFilter_GPU(int sumType, int dstTy return Ptr(new NppColumnSumFilter(ksize, anchor)); } -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Morphology Filter -namespace -{ - typedef NppStatus (*nppMorfFilter_t)(const Npp8u*, Npp32s, Npp8u*, Npp32s, NppiSize, const Npp8u*, NppiSize, NppiPoint); - - struct NPPMorphFilter : public BaseFilter_GPU - { - NPPMorphFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, nppMorfFilter_t func_) : - BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), func(func_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - NppiSize oKernelSize; - oKernelSize.height = ksize.height; - oKernelSize.width = ksize.width; - NppiPoint oAnchor; - oAnchor.x = anchor.x; - oAnchor.y = anchor.y; - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, kernel.ptr(), oKernelSize, oAnchor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - GpuMat kernel; - nppMorfFilter_t func; - }; -} - -Ptr cv::gpu::getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, Point anchor) -{ - static const nppMorfFilter_t nppMorfFilter_callers[2][5] = - { - {0, nppiErode_8u_C1R, 0, 0, nppiErode_8u_C4R }, - {0, nppiDilate_8u_C1R, 0, 0, nppiDilate_8u_C4R } - }; - - CV_Assert(op == MORPH_ERODE || op == MORPH_DILATE); - CV_Assert(type == CV_8UC1 || type == CV_8UC4); - - GpuMat gpu_krnl; - normalizeKernel(kernel, gpu_krnl); - normalizeAnchor(anchor, ksize); - - return Ptr(new NPPMorphFilter(ksize, anchor, gpu_krnl, nppMorfFilter_callers[op][CV_MAT_CN(type)])); -} - -namespace -{ - struct MorphologyFilterEngine_GPU : public FilterEngine_GPU - { - MorphologyFilterEngine_GPU(const Ptr& filter2D_, int type_, int iters_) : - filter2D(filter2D_), type(type_), iters(iters_) - { - pbuf = &buf; - } - - MorphologyFilterEngine_GPU(const Ptr& filter2D_, int type_, int iters_, GpuMat& buf_) : - filter2D(filter2D_), type(type_), iters(iters_) - { - pbuf = &buf_; - } - - virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) - { - CV_Assert(src.type() == type); - - Size src_size = src.size(); - - dst.create(src_size, type); - - if (roi.size() != src_size) - { - dst.setTo(Scalar::all(0), stream); - } - - normalizeROI(roi, filter2D->ksize, filter2D->anchor, src_size); - - if (iters > 1) - pbuf->create(src_size, type); - - GpuMat srcROI = src(roi); - GpuMat dstROI = dst(roi); - - (*filter2D)(srcROI, dstROI, stream); - - for(int i = 1; i < iters; ++i) - { - dst.swap((*pbuf)); - - dstROI = dst(roi); - GpuMat bufROI = (*pbuf)(roi); - - (*filter2D)(bufROI, dstROI, stream); - } - } - - Ptr filter2D; - - int type; - int iters; - - GpuMat buf; - GpuMat* pbuf; - }; -} - -Ptr cv::gpu::createMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Point& anchor, int iterations) -{ - CV_Assert(iterations > 0); - - Size ksize = kernel.size(); - - Ptr filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor); - - return Ptr(new MorphologyFilterEngine_GPU(filter2D, type, iterations)); -} - -Ptr cv::gpu::createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf, const Point& anchor, int iterations) -{ - CV_Assert(iterations > 0); - - Size ksize = kernel.size(); - - Ptr filter2D = getMorphologyFilter_GPU(op, type, kernel, ksize, anchor); - - return Ptr(new MorphologyFilterEngine_GPU(filter2D, type, iterations, buf)); -} - -namespace -{ - void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, GpuMat& buf, Point anchor, int iterations, Stream& stream = Stream::Null()) - { - Mat kernel; - Size ksize = _kernel.data ? _kernel.size() : Size(3, 3); - - normalizeAnchor(anchor, ksize); - - if (iterations == 0 || _kernel.rows * _kernel.cols == 1) - { - src.copyTo(dst, stream); - return; - } - - dst.create(src.size(), src.type()); - - if (!_kernel.data) - { - kernel = getStructuringElement(MORPH_RECT, Size(1 + iterations * 2, 1 + iterations * 2)); - anchor = Point(iterations, iterations); - iterations = 1; - } - else if (iterations > 1 && countNonZero(_kernel) == _kernel.rows * _kernel.cols) - { - anchor = Point(anchor.x * iterations, anchor.y * iterations); - kernel = getStructuringElement(MORPH_RECT, - Size(ksize.width + (iterations - 1) * (ksize.width - 1), - ksize.height + (iterations - 1) * (ksize.height - 1)), - anchor); - iterations = 1; - } - else - kernel = _kernel; - - Ptr f = createMorphologyFilter_GPU(op, src.type(), kernel, buf, anchor, iterations); - - f->apply(src, dst, Rect(0,0,-1,-1), stream); - } - - void morphOp(int op, const GpuMat& src, GpuMat& dst, const Mat& _kernel, Point anchor, int iterations) - { - GpuMat buf; - morphOp(op, src, dst, _kernel, buf, anchor, iterations); - } -} - -void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations) -{ - morphOp(MORPH_ERODE, src, dst, kernel, anchor, iterations); -} - -void cv::gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor, int iterations, Stream& stream) -{ - morphOp(MORPH_ERODE, src, dst, kernel, buf, anchor, iterations, stream); -} - -void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor, int iterations) -{ - morphOp(MORPH_DILATE, src, dst, kernel, anchor, iterations); -} - -void cv::gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor, int iterations, Stream& stream) -{ - morphOp(MORPH_DILATE, src, dst, kernel, buf, anchor, iterations, stream); -} - -void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor, int iterations) -{ - GpuMat buf1; - GpuMat buf2; - morphologyEx(src, dst, op, kernel, buf1, buf2, anchor, iterations); -} - -void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor, int iterations, Stream& stream) -{ - switch( op ) - { - case MORPH_ERODE: - erode(src, dst, kernel, buf1, anchor, iterations, stream); - break; - - case MORPH_DILATE: - dilate(src, dst, kernel, buf1, anchor, iterations, stream); - break; - - case MORPH_OPEN: - erode(src, buf2, kernel, buf1, anchor, iterations, stream); - dilate(buf2, dst, kernel, buf1, anchor, iterations, stream); - break; - - case MORPH_CLOSE: - dilate(src, buf2, kernel, buf1, anchor, iterations, stream); - erode(buf2, dst, kernel, buf1, anchor, iterations, stream); - break; - - case MORPH_GRADIENT: - erode(src, buf2, kernel, buf1, anchor, iterations, stream); - dilate(src, dst, kernel, buf1, anchor, iterations, stream); - gpu::subtract(dst, buf2, dst, GpuMat(), -1, stream); - break; - - case MORPH_TOPHAT: - erode(src, dst, kernel, buf1, anchor, iterations, stream); - dilate(dst, buf2, kernel, buf1, anchor, iterations, stream); - gpu::subtract(src, buf2, dst, GpuMat(), -1, stream); - break; - - case MORPH_BLACKHAT: - dilate(src, dst, kernel, buf1, anchor, iterations, stream); - erode(dst, buf2, kernel, buf1, anchor, iterations, stream); - gpu::subtract(buf2, src, dst, GpuMat(), -1, stream); - break; - - default: - CV_Error(cv::Error::StsBadArg, "unknown morphological operation"); - } -} //////////////////////////////////////////////////////////////////////////////////////////////////// // Image Rank Filter diff --git a/modules/gpufilters/test/test_filters.cpp b/modules/gpufilters/test/test_filters.cpp index e052ede6c..03bea05e6 100644 --- a/modules/gpufilters/test/test_filters.cpp +++ b/modules/gpufilters/test/test_filters.cpp @@ -489,15 +489,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine( BorderType(cv::BORDER_REFLECT)), WHOLE_SUBMAT)); - - - - - - - - - ///////////////////////////////////////////////////////////////////////////////////////////////// // Erode @@ -528,8 +519,10 @@ GPU_TEST_P(Erode, Accuracy) cv::Mat src = randomMat(size, type); cv::Mat kernel = cv::Mat::ones(3, 3, CV_8U); + cv::Ptr erode = cv::gpu::createMorphologyFilter(cv::MORPH_ERODE, src.type(), kernel, anchor, iterations); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::erode(loadMat(src, useRoi), dst, kernel, anchor, iterations); + erode->apply(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::erode(src, dst_gold, kernel, anchor, iterations); @@ -577,8 +570,10 @@ GPU_TEST_P(Dilate, Accuracy) cv::Mat src = randomMat(size, type); cv::Mat kernel = cv::Mat::ones(3, 3, CV_8U); + cv::Ptr dilate = cv::gpu::createMorphologyFilter(cv::MORPH_DILATE, src.type(), kernel, anchor, iterations); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::dilate(loadMat(src, useRoi), dst, kernel, anchor, iterations); + dilate->apply(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::dilate(src, dst_gold, kernel, anchor, iterations); @@ -630,8 +625,10 @@ GPU_TEST_P(MorphEx, Accuracy) cv::Mat src = randomMat(size, type); cv::Mat kernel = cv::Mat::ones(3, 3, CV_8U); + cv::Ptr morph = cv::gpu::createMorphologyFilter(morphOp, src.type(), kernel, anchor, iterations); + cv::gpu::GpuMat dst = createMat(size, type, useRoi); - cv::gpu::morphologyEx(loadMat(src, useRoi), dst, morphOp, kernel, anchor, iterations); + morph->apply(loadMat(src, useRoi), dst); cv::Mat dst_gold; cv::morphologyEx(src, dst_gold, morphOp, kernel, anchor, iterations); diff --git a/samples/gpu/morphology.cpp b/samples/gpu/morphology.cpp index 1ed8f96dc..abc6c980b 100644 --- a/samples/gpu/morphology.cpp +++ b/samples/gpu/morphology.cpp @@ -1,120 +1,186 @@ +#include -#include "opencv2/imgproc/imgproc.hpp" -#include "opencv2/highgui/highgui.hpp" -#include "opencv2/gpu/gpu.hpp" -#include -#include +#include "opencv2/imgproc.hpp" +#include "opencv2/highgui.hpp" +#include "opencv2/gpufilters.hpp" +#include "opencv2/gpuimgproc.hpp" using namespace std; using namespace cv; -using namespace cv::gpu; -static void help() +class App { +public: + App(int argc, const char* argv[]); -printf("\nShow off image morphology: erosion, dialation, open and close\n" - "Call:\n morphology2 [image]\n" - "This program also shows use of rect, elipse and cross kernels\n\n"); -printf( "Hot keys: \n" - "\tESC - quit the program\n" - "\tr - use rectangle structuring element\n" - "\te - use elliptic structuring element\n" - "\tc - use cross-shaped structuring element\n" - "\tSPACE - loop through all the options\n" ); -} + int run(); -GpuMat src, dst; +private: + void help(); -int element_shape = MORPH_RECT; + void OpenClose(); + void ErodeDilate(); -//the address of variable which receives trackbar position update -int max_iters = 10; -int open_close_pos = 0; -int erode_dilate_pos = 0; + static void OpenCloseCallback(int, void*); + static void ErodeDilateCallback(int, void*); -// callback function for open/close trackbar -static void OpenClose(int, void*) + gpu::GpuMat src, dst; + + int element_shape; + + int max_iters; + int open_close_pos; + int erode_dilate_pos; +}; + +App::App(int argc, const char* argv[]) { - int n = open_close_pos - max_iters; - int an = n > 0 ? n : -n; - Mat element = getStructuringElement(element_shape, Size(an*2+1, an*2+1), Point(an, an) ); - if( n < 0 ) - cv::gpu::morphologyEx(src, dst, MORPH_OPEN, element); - else - cv::gpu::morphologyEx(src, dst, MORPH_CLOSE, element); - imshow("Open/Close",(Mat)dst); -} + element_shape = MORPH_RECT; + open_close_pos = erode_dilate_pos = max_iters = 10; -// callback function for erode/dilate trackbar -static void ErodeDilate(int, void*) -{ - int n = erode_dilate_pos - max_iters; - int an = n > 0 ? n : -n; - Mat element = getStructuringElement(element_shape, Size(an*2+1, an*2+1), Point(an, an) ); - if( n < 0 ) - cv::gpu::erode(src, dst, element); - else - cv::gpu::dilate(src, dst, element); - imshow("Erode/Dilate",(Mat)dst); -} - - -int main( int argc, char** argv ) -{ - char* filename = argc == 2 ? argv[1] : (char*)"baboon.jpg"; - if (string(argv[1]) == "--help") + if (argc == 2 && String(argv[1]) == "--help") { help(); - return -1; + exit(0); } - src.upload(imread(filename, 1)); - if (src.empty()) + String filename = argc == 2 ? argv[1] : "baboon.jpg"; + + Mat img = imread(filename); + if (img.empty()) { - help(); - return -1; + cerr << "Can't open image " << filename.c_str() << endl; + exit(-1); } - cv::gpu::printShortCudaDeviceInfo(cv::gpu::getDevice()); - - help(); - - + src.upload(img); if (src.channels() == 3) { // gpu support only 4th channel images - GpuMat src4ch; - cv::gpu::cvtColor(src, src4ch, COLOR_BGR2BGRA); + gpu::GpuMat src4ch; + gpu::cvtColor(src, src4ch, COLOR_BGR2BGRA); src = src4ch; } - //create windows for output images - namedWindow("Open/Close",1); - namedWindow("Erode/Dilate",1); + help(); - open_close_pos = erode_dilate_pos = max_iters; - createTrackbar("iterations", "Open/Close",&open_close_pos,max_iters*2+1,OpenClose); - createTrackbar("iterations", "Erode/Dilate",&erode_dilate_pos,max_iters*2+1,ErodeDilate); + gpu::printShortCudaDeviceInfo(gpu::getDevice()); +} + +int App::run() +{ + // create windows for output images + namedWindow("Open/Close"); + namedWindow("Erode/Dilate"); + + createTrackbar("iterations", "Open/Close", &open_close_pos, max_iters * 2 + 1, OpenCloseCallback, this); + createTrackbar("iterations", "Erode/Dilate", &erode_dilate_pos, max_iters * 2 + 1, ErodeDilateCallback, this); for(;;) { - int c; + OpenClose(); + ErodeDilate(); - OpenClose(open_close_pos, 0); - ErodeDilate(erode_dilate_pos, 0); - c = waitKey(); + char c = (char) waitKey(); - if( (char)c == 27 ) + switch (c) + { + case 27: + return 0; break; - if( (char)c == 'e' ) + + case 'e': element_shape = MORPH_ELLIPSE; - else if( (char)c == 'r' ) + break; + + case 'r': element_shape = MORPH_RECT; - else if( (char)c == 'c' ) + break; + + case 'c': element_shape = MORPH_CROSS; - else if( (char)c == ' ' ) + break; + + case ' ': element_shape = (element_shape + 1) % 3; + break; + } + } +} + +void App::help() +{ + cout << "Show off image morphology: erosion, dialation, open and close \n"; + cout << "Call: \n"; + cout << " gpu-example-morphology [image] \n"; + cout << "This program also shows use of rect, elipse and cross kernels \n" << endl; + + cout << "Hot keys: \n"; + cout << "\tESC - quit the program \n"; + cout << "\tr - use rectangle structuring element \n"; + cout << "\te - use elliptic structuring element \n"; + cout << "\tc - use cross-shaped structuring element \n"; + cout << "\tSPACE - loop through all the options \n" << endl; +} + +void App::OpenClose() +{ + int n = open_close_pos - max_iters; + int an = n > 0 ? n : -n; + + Mat element = getStructuringElement(element_shape, Size(an*2+1, an*2+1), Point(an, an)); + + if (n < 0) + { + Ptr openFilter = gpu::createMorphologyFilter(MORPH_OPEN, src.type(), element); + openFilter->apply(src, dst); + } + else + { + Ptr closeFilter = gpu::createMorphologyFilter(MORPH_CLOSE, src.type(), element); + closeFilter->apply(src, dst); } - return 0; + Mat h_dst(dst); + imshow("Open/Close", h_dst); +} + +void App::ErodeDilate() +{ + int n = erode_dilate_pos - max_iters; + int an = n > 0 ? n : -n; + + Mat element = getStructuringElement(element_shape, Size(an*2+1, an*2+1), Point(an, an)); + + if (n < 0) + { + Ptr erodeFilter = gpu::createMorphologyFilter(MORPH_ERODE, src.type(), element); + erodeFilter->apply(src, dst); + } + else + { + Ptr dilateFilter = gpu::createMorphologyFilter(MORPH_DILATE, src.type(), element); + dilateFilter->apply(src, dst); + } + + Mat h_dst(dst); + imshow("Erode/Dilate", h_dst); +} + +void App::OpenCloseCallback(int, void* data) +{ + App* thiz = (App*) data; + thiz->OpenClose(); +} + +void App::ErodeDilateCallback(int, void* data) +{ + App* thiz = (App*) data; + thiz->ErodeDilate(); +} + +int main(int argc, const char* argv[]) +{ + App app(argc, argv); + return app.run(); } diff --git a/samples/gpu/performance/tests.cpp b/samples/gpu/performance/tests.cpp index cbe50a40f..4333b7625 100644 --- a/samples/gpu/performance/tests.cpp +++ b/samples/gpu/performance/tests.cpp @@ -746,10 +746,12 @@ TEST(erode) d_src.upload(src); - gpu::erode(d_src, d_dst, ker, d_buf); + Ptr erode = gpu::createMorphologyFilter(MORPH_ERODE, d_src.type(), ker); + + erode->apply(d_src, d_dst); GPU_ON; - gpu::erode(d_src, d_dst, ker, d_buf); + erode->apply(d_src, d_dst); GPU_OFF; } } From 4bb297afc2ff5c1555c5f1f3dd1a0f760823a446 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 16:28:15 +0400 Subject: [PATCH 079/121] refactored Image Rank Filters --- .../gpufilters/include/opencv2/gpufilters.hpp | 17 +- modules/gpufilters/src/filtering.cpp | 157 +++++++++++------- 2 files changed, 106 insertions(+), 68 deletions(-) diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index 7595bd543..c5a61a06e 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -241,7 +241,18 @@ inline void morphologyEx(InputArray src, OutputArray dst, int op, InputArray ker f->apply(src, dst, stream); } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Image Rank Filter +//! Result pixel value is the maximum of pixel values under the rectangular mask region +CV_EXPORTS Ptr createBoxMaxFilter(int srcType, Size ksize, + Point anchor = Point(-1, -1), + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + +//! Result pixel value is the maximum of pixel values under the rectangular mask region +CV_EXPORTS Ptr createBoxMinFilter(int srcType, Size ksize, + Point anchor = Point(-1, -1), + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); @@ -326,12 +337,6 @@ CV_EXPORTS Ptr getColumnSumFilter_GPU(int sumType, int dst -//! returns maximum filter -CV_EXPORTS Ptr getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); - -//! returns minimum filter -CV_EXPORTS Ptr getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)); - diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 145e04984..a8ec83a74 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -63,6 +63,8 @@ Ptr cv::gpu::createGaussianFilter(int, int, Size, double, double, int, i Ptr cv::gpu::createMorphologyFilter(int, int, InputArray, Point, int) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createBoxMaxFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createBoxMinFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } @@ -70,8 +72,6 @@ Ptr cv::gpu::createMorphologyFilter(int, int, InputArray, Point, int) { Ptr cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_no_cuda(); return Ptr(0); } @@ -783,6 +783,99 @@ Ptr cv::gpu::createMorphologyFilter(int op, int srcType, InputArray kern } } +//////////////////////////////////////////////////////////////////////////////////////////////////// +// Image Rank Filter + +namespace +{ + enum + { + RANK_MAX, + RANK_MIN + }; + + class NPPRankFilter : public Filter + { + public: + NPPRankFilter(int op, int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal); + + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); + + private: + typedef NppStatus (*nppFilterRank_t)(const Npp8u* pSrc, Npp32s nSrcStep, Npp8u* pDst, Npp32s nDstStep, NppiSize oSizeROI, + NppiSize oMaskSize, NppiPoint oAnchor); + + int type_; + Size ksize_; + Point anchor_; + int borderMode_; + Scalar borderVal_; + nppFilterRank_t func_; + + GpuMat srcBorder_; + }; + + NPPRankFilter::NPPRankFilter(int op, int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal) : + type_(srcType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal) + { + static const nppFilterRank_t maxFuncs[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R}; + static const nppFilterRank_t minFuncs[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R}; + + CV_Assert( srcType == CV_8UC1 || srcType == CV_8UC4 ); + + normalizeAnchor(anchor_, ksize_); + + if (op == RANK_MAX) + func_ = maxFuncs[CV_MAT_CN(srcType)]; + else + func_ = minFuncs[CV_MAT_CN(srcType)]; + } + + void NPPRankFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == type_ ); + + gpu::copyMakeBorder(src, srcBorder_, ksize_.height, ksize_.height, ksize_.width, ksize_.width, borderMode_, borderVal_, _stream); + + _dst.create(src.size(), src.type()); + GpuMat dst = _dst.getGpuMat(); + + GpuMat srcRoi = srcBorder_(Rect(ksize_.width, ksize_.height, src.cols, src.rows)); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + NppStreamHandler h(stream); + + NppiSize oSizeROI; + oSizeROI.width = src.cols; + oSizeROI.height = src.rows; + + NppiSize oMaskSize; + oMaskSize.height = ksize_.height; + oMaskSize.width = ksize_.width; + + NppiPoint oAnchor; + oAnchor.x = anchor_.x; + oAnchor.y = anchor_.y; + + nppSafeCall( func_(srcRoi.ptr(), static_cast(srcRoi.step), dst.ptr(), static_cast(dst.step), + oSizeROI, oMaskSize, oAnchor) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } +} + +Ptr cv::gpu::createBoxMaxFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal) +{ + return new NPPRankFilter(RANK_MAX, srcType, ksize, anchor, borderMode, borderVal); +} + +Ptr cv::gpu::createBoxMinFilter(int srcType, Size ksize, Point anchor, int borderMode, Scalar borderVal) +{ + return new NPPRankFilter(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal); +} + @@ -924,64 +1017,4 @@ Ptr cv::gpu::getColumnSumFilter_GPU(int sumType, int dstTy -//////////////////////////////////////////////////////////////////////////////////////////////////// -// Image Rank Filter - -namespace -{ - typedef NppStatus (*nppFilterRank_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI, - NppiSize oMaskSize, NppiPoint oAnchor); - - struct NPPRankFilter : public BaseFilter_GPU - { - NPPRankFilter(const Size& ksize_, const Point& anchor_, nppFilterRank_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {} - - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; - NppiSize oKernelSize; - oKernelSize.height = ksize.height; - oKernelSize.width = ksize.width; - NppiPoint oAnchor; - oAnchor.x = anchor.x; - oAnchor.y = anchor.y; - - cudaStream_t stream = StreamAccessor::getStream(s); - - NppStreamHandler h(stream); - - nppSafeCall( func(src.ptr(), static_cast(src.step), dst.ptr(), static_cast(dst.step), sz, oKernelSize, oAnchor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } - - nppFilterRank_t func; - }; -} - -Ptr cv::gpu::getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor) -{ - static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R}; - - CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); - - normalizeAnchor(anchor, ksize); - - return Ptr(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)])); -} - -Ptr cv::gpu::getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor) -{ - static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R}; - - CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType); - - normalizeAnchor(anchor, ksize); - - return Ptr(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)])); -} - #endif From 4317cd1ffa4295b49759070c6089d58e1f099e05 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 29 Apr 2013 17:09:17 +0400 Subject: [PATCH 080/121] refactored 1D Sum Filters --- .../gpufilters/include/opencv2/gpufilters.hpp | 91 +------ modules/gpufilters/src/filtering.cpp | 222 ++++++++---------- 2 files changed, 106 insertions(+), 207 deletions(-) diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index c5a61a06e..76b5b731d 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -254,95 +254,16 @@ CV_EXPORTS Ptr createBoxMinFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); +//////////////////////////////////////////////////////////////////////////////////////////////////// +// 1D Sum Filter - - - - - -/*! -The Base Class for 1D or Row-wise Filters - -This is the base class for linear or non-linear filters that process 1D data. -In particular, such filters are used for the "horizontal" filtering parts in separable filters. -*/ -class CV_EXPORTS BaseRowFilter_GPU -{ -public: - BaseRowFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseRowFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; -}; - -/*! -The Base Class for Column-wise Filters - -This is the base class for linear or non-linear filters that process columns of 2D arrays. -Such filters are used for the "vertical" filtering parts in separable filters. -*/ -class CV_EXPORTS BaseColumnFilter_GPU -{ -public: - BaseColumnFilter_GPU(int ksize_, int anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseColumnFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; -}; - -/*! -The Base Class for Non-Separable 2D Filters. - -This is the base class for linear or non-linear 2D filters. -*/ -class CV_EXPORTS BaseFilter_GPU -{ -public: - BaseFilter_GPU(const Size& ksize_, const Point& anchor_) : ksize(ksize_), anchor(anchor_) {} - virtual ~BaseFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - Size ksize; - Point anchor; -}; - -/*! -The Base Class for Filter Engine. - -The class can be used to apply an arbitrary filtering operation to an image. -It contains all the necessary intermediate buffers. -*/ -class CV_EXPORTS FilterEngine_GPU -{ -public: - virtual ~FilterEngine_GPU() {} - - virtual void apply(const GpuMat& src, GpuMat& dst, Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0; -}; - - - -//! returns horizontal 1D box filter +//! creates a horizontal 1D box filter //! supports only CV_8UC1 source type and CV_32FC1 sum type -CV_EXPORTS Ptr getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1); +CV_EXPORTS Ptr createRowSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -//! returns vertical 1D box filter +//! creates a vertical 1D box filter //! supports only CV_8UC1 sum type and CV_32FC1 dst type -CV_EXPORTS Ptr getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1); - - - - - - - - - - - - - - - +CV_EXPORTS Ptr createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); }} // namespace cv { namespace gpu { diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index a8ec83a74..7f02bdac5 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -66,14 +66,8 @@ Ptr cv::gpu::createMorphologyFilter(int, int, InputArray, Point, int) { Ptr cv::gpu::createBoxMaxFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } Ptr cv::gpu::createBoxMinFilter(int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr(); } - - - - -Ptr cv::gpu::getRowSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } -Ptr cv::gpu::getColumnSumFilter_GPU(int, int, int, int) { throw_no_cuda(); return Ptr(0); } - - +Ptr cv::gpu::createRowSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr(); } +Ptr cv::gpu::createColumnSumFilter(int, int, int, int, int, Scalar) { throw_no_cuda(); return Ptr(); } #else @@ -876,145 +870,129 @@ Ptr cv::gpu::createBoxMinFilter(int srcType, Size ksize, Point anchor, i return new NPPRankFilter(RANK_MIN, srcType, ksize, anchor, borderMode, borderVal); } - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -namespace -{ - inline void normalizeROI(Rect& roi, const Size& ksize, const Point& anchor, const Size& src_size) - { - if (roi == Rect(0,0,-1,-1)) - roi = Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height); - - CV_Assert(roi.x >= 0 && roi.y >= 0 && roi.width <= src_size.width && roi.height <= src_size.height); - } - - inline void normalizeKernel(const Mat& kernel, GpuMat& gpu_krnl, int type = CV_8U, int* nDivisor = 0, bool reverse = false) - { - int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1; - if (nDivisor) *nDivisor = scale; - - Mat temp(kernel.size(), type); - kernel.convertTo(temp, type, scale); - Mat cont_krnl = temp.reshape(1, 1); - - if (reverse) - { - int count = cont_krnl.cols >> 1; - for (int i = 0; i < count; ++i) - { - std::swap(cont_krnl.at(0, i), cont_krnl.at(0, cont_krnl.cols - 1 - i)); - } - } - - gpu_krnl.upload(cont_krnl); - } -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // 1D Sum Filter namespace { - struct NppRowSumFilter : public BaseRowFilter_GPU + class NppRowSumFilter : public Filter { - NppRowSumFilter(int ksize_, int anchor_) : BaseRowFilter_GPU(ksize_, anchor_) {} + public: + NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal); - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); - cudaStream_t stream = StreamAccessor::getStream(s); + private: + int srcType_, dstType_; + int ksize_; + int anchor_; + int borderMode_; + Scalar borderVal_; - NppStreamHandler h(stream); - - nppSafeCall( nppiSumWindowRow_8u32f_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, ksize, anchor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + GpuMat srcBorder_; }; + + NppRowSumFilter::NppRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) : + srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal) + { + CV_Assert( srcType_ == CV_8UC1 ); + CV_Assert( dstType_ == CV_32FC1 ); + + normalizeAnchor(anchor_, ksize_); + } + + void NppRowSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == srcType_ ); + + gpu::copyMakeBorder(src, srcBorder_, 0, 0, ksize_, ksize_, borderMode_, borderVal_, _stream); + + _dst.create(src.size(), dstType_); + GpuMat dst = _dst.getGpuMat(); + + GpuMat srcRoi = srcBorder_(Rect(ksize_, 0, src.cols, src.rows)); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + NppStreamHandler h(stream); + + NppiSize oSizeROI; + oSizeROI.width = src.cols; + oSizeROI.height = src.rows; + + nppSafeCall( nppiSumWindowRow_8u32f_C1R(srcRoi.ptr(), static_cast(srcRoi.step), + dst.ptr(), static_cast(dst.step), + oSizeROI, ksize_, anchor_) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } } -Ptr cv::gpu::getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor) +Ptr cv::gpu::createRowSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) { - CV_Assert(srcType == CV_8UC1 && sumType == CV_32FC1); - - normalizeAnchor(anchor, ksize); - - return Ptr(new NppRowSumFilter(ksize, anchor)); + return new NppRowSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); } namespace { - struct NppColumnSumFilter : public BaseColumnFilter_GPU + class NppColumnSumFilter : public Filter { - NppColumnSumFilter(int ksize_, int anchor_) : BaseColumnFilter_GPU(ksize_, anchor_) {} + public: + NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal); - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null()) - { - NppiSize sz; - sz.width = src.cols; - sz.height = src.rows; + void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()); - cudaStream_t stream = StreamAccessor::getStream(s); + private: + int srcType_, dstType_; + int ksize_; + int anchor_; + int borderMode_; + Scalar borderVal_; - NppStreamHandler h(stream); - - nppSafeCall( nppiSumWindowColumn_8u32f_C1R(src.ptr(), static_cast(src.step), - dst.ptr(), static_cast(dst.step), sz, ksize, anchor) ); - - if (stream == 0) - cudaSafeCall( cudaDeviceSynchronize() ); - } + GpuMat srcBorder_; }; + + NppColumnSumFilter::NppColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) : + srcType_(srcType), dstType_(dstType), ksize_(ksize), anchor_(anchor), borderMode_(borderMode), borderVal_(borderVal) + { + CV_Assert( srcType_ == CV_8UC1 ); + CV_Assert( dstType_ == CV_32FC1 ); + + normalizeAnchor(anchor_, ksize_); + } + + void NppColumnSumFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream) + { + GpuMat src = _src.getGpuMat(); + CV_Assert( src.type() == srcType_ ); + + gpu::copyMakeBorder(src, srcBorder_, ksize_, ksize_, 0, 0, borderMode_, borderVal_, _stream); + + _dst.create(src.size(), dstType_); + GpuMat dst = _dst.getGpuMat(); + + GpuMat srcRoi = srcBorder_(Rect(0, ksize_, src.cols, src.rows)); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + NppStreamHandler h(stream); + + NppiSize oSizeROI; + oSizeROI.width = src.cols; + oSizeROI.height = src.rows; + + nppSafeCall( nppiSumWindowColumn_8u32f_C1R(srcRoi.ptr(), static_cast(srcRoi.step), + dst.ptr(), static_cast(dst.step), + oSizeROI, ksize_, anchor_) ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } } -Ptr cv::gpu::getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor) +Ptr cv::gpu::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor, int borderMode, Scalar borderVal) { - CV_Assert(sumType == CV_8UC1 && dstType == CV_32FC1); - - normalizeAnchor(anchor, ksize); - - return Ptr(new NppColumnSumFilter(ksize, anchor)); + return new NppColumnSumFilter(srcType, dstType, ksize, anchor, borderMode, borderVal); } - - #endif From dd6d58f8739a5a1a87517584a01e6cbee12ffb9e Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 8 May 2013 14:27:56 +0400 Subject: [PATCH 081/121] refactored StereoBM --- .../gpustereo/include/opencv2/gpustereo.hpp | 41 ++--- modules/gpustereo/perf/perf_stereo.cpp | 5 +- modules/gpustereo/src/stereobm.cpp | 145 ++++++++++++------ modules/gpustereo/test/test_stereo.cpp | 4 +- samples/gpu/driver_api_stereo_multi.cpp | 13 +- samples/gpu/stereo_match.cpp | 34 ++-- samples/gpu/stereo_multi.cpp | 13 +- 7 files changed, 139 insertions(+), 116 deletions(-) diff --git a/modules/gpustereo/include/opencv2/gpustereo.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp index ecda51206..06843e71b 100644 --- a/modules/gpustereo/include/opencv2/gpustereo.hpp +++ b/modules/gpustereo/include/opencv2/gpustereo.hpp @@ -48,44 +48,25 @@ #endif #include "opencv2/core/gpu.hpp" +#include "opencv2/calib3d.hpp" namespace cv { namespace gpu { -class CV_EXPORTS StereoBM_GPU +///////////////////////////////////////// +// StereoBM + +class CV_EXPORTS StereoBM : public cv::StereoBM { public: - enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 }; + using cv::StereoBM::compute; - enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 }; - - //! the default constructor - StereoBM_GPU(); - //! the full constructor taking the camera-specific preset, number of disparities and the SAD window size. ndisparities must be multiple of 8. - StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ); - - //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair - //! Output disparity has CV_8U type. - void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null()); - - //! Some heuristics that tries to estmate - // if current GPU will be faster than CPU in this algorithm. - // It queries current active device. - static bool checkIfGpuCallReasonable(); - - int preset; - int ndisp; - int winSize; - - // If avergeTexThreshold == 0 => post procesing is disabled - // If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image - // SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold - // i.e. input left image is low textured. - float avergeTexThreshold; - -private: - GpuMat minSSD, leBuf, riBuf; + virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0; }; +CV_EXPORTS Ptr createStereoBM(int numDisparities = 64, int blockSize = 19); + + + // "Efficient Belief Propagation for Early Vision" // P.Felzenszwalb class CV_EXPORTS StereoBeliefPropagation diff --git a/modules/gpustereo/perf/perf_stereo.cpp b/modules/gpustereo/perf/perf_stereo.cpp index e0438c0ae..fde0bbaa1 100644 --- a/modules/gpustereo/perf/perf_stereo.cpp +++ b/modules/gpustereo/perf/perf_stereo.cpp @@ -63,18 +63,17 @@ PERF_TEST_P(ImagePair, StereoBM, const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE); ASSERT_FALSE(imgRight.empty()); - const int preset = 0; const int ndisp = 256; if (PERF_RUN_GPU()) { - cv::gpu::StereoBM_GPU d_bm(preset, ndisp); + cv::Ptr d_bm = cv::gpu::createStereoBM(ndisp); const cv::gpu::GpuMat d_imgLeft(imgLeft); const cv::gpu::GpuMat d_imgRight(imgRight); cv::gpu::GpuMat dst; - TEST_CYCLE() d_bm(d_imgLeft, d_imgRight, dst); + TEST_CYCLE() d_bm->compute(d_imgLeft, d_imgRight, dst); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpustereo/src/stereobm.cpp b/modules/gpustereo/src/stereobm.cpp index f8e6c20fb..9b32cf7e9 100644 --- a/modules/gpustereo/src/stereobm.cpp +++ b/modules/gpustereo/src/stereobm.cpp @@ -47,11 +47,7 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::gpu::StereoBM_GPU::StereoBM_GPU() { throw_no_cuda(); } -cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_no_cuda(); } - -bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_no_cuda(); return false; } -void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +Ptr cv::gpu::createStereoBM(int, int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -67,74 +63,123 @@ namespace cv { namespace gpu { namespace cudev namespace { - const float defaultAvgTexThreshold = 3; -} + class StereoBMImpl : public gpu::StereoBM + { + public: + StereoBMImpl(int numDisparities, int blockSize); -cv::gpu::StereoBM_GPU::StereoBM_GPU() - : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold) -{ -} + void compute(InputArray left, InputArray right, OutputArray disparity); + void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream); -cv::gpu::StereoBM_GPU::StereoBM_GPU(int preset_, int ndisparities_, int winSize_) - : preset(preset_), ndisp(ndisparities_), winSize(winSize_), avergeTexThreshold(defaultAvgTexThreshold) -{ - const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8); - CV_Assert(0 < ndisp && ndisp <= max_supported_ndisp); - CV_Assert(ndisp % 8 == 0); - CV_Assert(winSize % 2 == 1); -} + int getMinDisparity() const { return 0; } + void setMinDisparity(int /*minDisparity*/) {} -bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() -{ - if (0 == getCudaEnabledDeviceCount()) - return false; + int getNumDisparities() const { return ndisp_; } + void setNumDisparities(int numDisparities) { ndisp_ = numDisparities; } - DeviceInfo device_info; + int getBlockSize() const { return winSize_; } + void setBlockSize(int blockSize) { winSize_ = blockSize; } - if (device_info.major() > 1 || device_info.multiProcessorCount() > 16) - return true; + int getSpeckleWindowSize() const { return 0; } + void setSpeckleWindowSize(int /*speckleWindowSize*/) {} - return false; -} + int getSpeckleRange() const { return 0; } + void setSpeckleRange(int /*speckleRange*/) {} -namespace -{ - void stereo_bm_gpu_operator( GpuMat& minSSD, GpuMat& leBuf, GpuMat& riBuf, int preset, int ndisp, int winSize, float avergeTexThreshold, const GpuMat& left, const GpuMat& right, GpuMat& disparity, cudaStream_t stream) + int getDisp12MaxDiff() const { return 0; } + void setDisp12MaxDiff(int /*disp12MaxDiff*/) {} + + int getPreFilterType() const { return preset_; } + void setPreFilterType(int preFilterType) { preset_ = preFilterType; } + + int getPreFilterSize() const { return 0; } + void setPreFilterSize(int /*preFilterSize*/) {} + + int getPreFilterCap() const { return preFilterCap_; } + void setPreFilterCap(int preFilterCap) { preFilterCap_ = preFilterCap; } + + int getTextureThreshold() const { return avergeTexThreshold_; } + void setTextureThreshold(int textureThreshold) { avergeTexThreshold_ = textureThreshold; } + + int getUniquenessRatio() const { return 0; } + void setUniquenessRatio(int /*uniquenessRatio*/) {} + + int getSmallerBlockSize() const { return 0; } + void setSmallerBlockSize(int /*blockSize*/){} + + Rect getROI1() const { return Rect(); } + void setROI1(Rect /*roi1*/) {} + + Rect getROI2() const { return Rect(); } + void setROI2(Rect /*roi2*/) {} + + private: + int preset_; + int ndisp_; + int winSize_; + int preFilterCap_; + float avergeTexThreshold_; + + GpuMat minSSD_, leBuf_, riBuf_; + }; + + StereoBMImpl::StereoBMImpl(int numDisparities, int blockSize) + : preset_(0), ndisp_(numDisparities), winSize_(blockSize), preFilterCap_(31), avergeTexThreshold_(3) + { + } + + void StereoBMImpl::compute(InputArray left, InputArray right, OutputArray disparity) + { + compute(left, right, disparity, Stream::Null()); + } + + void StereoBMImpl::compute(InputArray _left, InputArray _right, OutputArray _disparity, Stream& _stream) { using namespace ::cv::gpu::cudev::stereobm; - CV_Assert(left.rows == right.rows && left.cols == right.cols); - CV_Assert(left.type() == CV_8UC1); - CV_Assert(right.type() == CV_8UC1); + const int max_supported_ndisp = 1 << (sizeof(unsigned char) * 8); + CV_Assert( 0 < ndisp_ && ndisp_ <= max_supported_ndisp ); + CV_Assert( ndisp_ % 8 == 0 ); + CV_Assert( winSize_ % 2 == 1 ); - disparity.create(left.size(), CV_8U); - minSSD.create(left.size(), CV_32S); + GpuMat left = _left.getGpuMat(); + GpuMat right = _right.getGpuMat(); - GpuMat le_for_bm = left; - GpuMat ri_for_bm = right; + CV_Assert( left.type() == CV_8UC1 ); + CV_Assert( left.size() == right.size() && left.type() == right.type() ); - if (preset == StereoBM_GPU::PREFILTER_XSOBEL) + _disparity.create(left.size(), CV_8UC1); + GpuMat disparity = _disparity.getGpuMat(); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + gpu::ensureSizeIsEnough(left.size(), CV_32SC1, minSSD_); + + PtrStepSzb le_for_bm = left; + PtrStepSzb ri_for_bm = right; + + if (preset_ == cv::StereoBM::PREFILTER_XSOBEL) { - leBuf.create( left.size(), left.type()); - riBuf.create(right.size(), right.type()); + gpu::ensureSizeIsEnough(left.size(), left.type(), leBuf_); + gpu::ensureSizeIsEnough(right.size(), right.type(), riBuf_); - prefilter_xsobel( left, leBuf, 31, stream); - prefilter_xsobel(right, riBuf, 31, stream); + prefilter_xsobel( left, leBuf_, preFilterCap_, stream); + prefilter_xsobel(right, riBuf_, preFilterCap_, stream); - le_for_bm = leBuf; - ri_for_bm = riBuf; + le_for_bm = leBuf_; + ri_for_bm = riBuf_; } - stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp, winSize, minSSD, stream); + stereoBM_GPU(le_for_bm, ri_for_bm, disparity, ndisp_, winSize_, minSSD_, stream); - if (avergeTexThreshold) - postfilter_textureness(le_for_bm, winSize, avergeTexThreshold, disparity, stream); + if (avergeTexThreshold_ > 0) + postfilter_textureness(le_for_bm, winSize_, avergeTexThreshold_, disparity, stream); } } -void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream) +Ptr cv::gpu::createStereoBM(int numDisparities, int blockSize) { - stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream)); + return new StereoBMImpl(numDisparities, blockSize); } #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpustereo/test/test_stereo.cpp b/modules/gpustereo/test/test_stereo.cpp index 0ead03dc5..2046890d6 100644 --- a/modules/gpustereo/test/test_stereo.cpp +++ b/modules/gpustereo/test/test_stereo.cpp @@ -71,10 +71,10 @@ GPU_TEST_P(StereoBM, Regression) ASSERT_FALSE(right_image.empty()); ASSERT_FALSE(disp_gold.empty()); - cv::gpu::StereoBM_GPU bm(0, 128, 19); + cv::Ptr bm = cv::gpu::createStereoBM(128, 19); cv::gpu::GpuMat disp; - bm(loadMat(left_image), loadMat(right_image), disp); + bm->compute(loadMat(left_image), loadMat(right_image), disp); EXPECT_MAT_NEAR(disp_gold, disp, 0.0); } diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp index fac9e3694..c49fc8564 100644 --- a/samples/gpu/driver_api_stereo_multi.cpp +++ b/samples/gpu/driver_api_stereo_multi.cpp @@ -85,7 +85,7 @@ void inline contextOff() // GPUs data GpuMat d_left[2]; GpuMat d_right[2]; -StereoBM_GPU* bm[2]; +Ptr bm[2]; GpuMat d_result[2]; static void printHelp() @@ -162,14 +162,14 @@ int main(int argc, char** argv) contextOn(0); d_left[0].upload(left.rowRange(0, left.rows / 2)); d_right[0].upload(right.rowRange(0, right.rows / 2)); - bm[0] = new StereoBM_GPU(); + bm[0] = gpu::createStereoBM(); contextOff(); // Split source images for processing on the GPU #1 contextOn(1); d_left[1].upload(left.rowRange(left.rows / 2, left.rows)); d_right[1].upload(right.rowRange(right.rows / 2, right.rows)); - bm[1] = new StereoBM_GPU(); + bm[1] = gpu::createStereoBM(); contextOff(); // Execute calculation in two threads using two GPUs @@ -182,7 +182,7 @@ int main(int argc, char** argv) d_left[0].release(); d_right[0].release(); d_result[0].release(); - delete bm[0]; + bm[0].release(); contextOff(); // Release the second GPU resources @@ -191,7 +191,7 @@ int main(int argc, char** argv) d_left[1].release(); d_right[1].release(); d_result[1].release(); - delete bm[1]; + bm[1].release(); contextOff(); waitKey(); @@ -204,8 +204,7 @@ void Worker::operator()(int device_id) const { contextOn(device_id); - bm[device_id]->operator()(d_left[device_id], d_right[device_id], - d_result[device_id]); + bm[device_id]->compute(d_left[device_id], d_right[device_id], d_result[device_id]); std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): finished\n"; diff --git a/samples/gpu/stereo_match.cpp b/samples/gpu/stereo_match.cpp index edf8886ff..071ddf584 100644 --- a/samples/gpu/stereo_match.cpp +++ b/samples/gpu/stereo_match.cpp @@ -65,7 +65,7 @@ private: Mat left, right; gpu::GpuMat d_left, d_right; - gpu::StereoBM_GPU bm; + Ptr bm; gpu::StereoBeliefPropagation bp; gpu::StereoConstantSpaceBP csbp; @@ -172,7 +172,7 @@ void App::run() imshow("right", right); // Set common parameters - bm.ndisp = p.ndisp; + bm = gpu::createStereoBM(p.ndisp); bp.ndisp = p.ndisp; csbp.ndisp = p.ndisp; @@ -201,7 +201,7 @@ void App::run() imshow("left", left); imshow("right", right); } - bm(d_left, d_right, d_disp); + bm->compute(d_left, d_right, d_disp); break; case Params::BP: bp(d_left, d_right, d_disp); break; case Params::CSBP: csbp(d_left, d_right, d_disp); break; @@ -228,8 +228,8 @@ void App::printParams() const switch (p.method) { case Params::BM: - cout << "win_size: " << bm.winSize << endl; - cout << "prefilter_sobel: " << bm.preset << endl; + cout << "win_size: " << bm->getBlockSize() << endl; + cout << "prefilter_sobel: " << bm->getPreFilterType() << endl; break; case Params::BP: cout << "iter_count: " << bp.iters << endl; @@ -289,44 +289,44 @@ void App::handleKey(char key) case 's': case 'S': if (p.method == Params::BM) { - switch (bm.preset) + switch (bm->getPreFilterType()) { - case gpu::StereoBM_GPU::BASIC_PRESET: - bm.preset = gpu::StereoBM_GPU::PREFILTER_XSOBEL; + case 0: + bm->setPreFilterType(cv::StereoBM::PREFILTER_XSOBEL); break; - case gpu::StereoBM_GPU::PREFILTER_XSOBEL: - bm.preset = gpu::StereoBM_GPU::BASIC_PRESET; + case cv::StereoBM::PREFILTER_XSOBEL: + bm->setPreFilterType(0); break; } - cout << "prefilter_sobel: " << bm.preset << endl; + cout << "prefilter_sobel: " << bm->getPreFilterType() << endl; } break; case '1': p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8; cout << "ndisp: " << p.ndisp << endl; - bm.ndisp = p.ndisp; + bm->setNumDisparities(p.ndisp); bp.ndisp = p.ndisp; csbp.ndisp = p.ndisp; break; case 'q': case 'Q': p.ndisp = max(p.ndisp - 8, 1); cout << "ndisp: " << p.ndisp << endl; - bm.ndisp = p.ndisp; + bm->setNumDisparities(p.ndisp); bp.ndisp = p.ndisp; csbp.ndisp = p.ndisp; break; case '2': if (p.method == Params::BM) { - bm.winSize = min(bm.winSize + 1, 51); - cout << "win_size: " << bm.winSize << endl; + bm->setBlockSize(min(bm->getBlockSize() + 1, 51)); + cout << "win_size: " << bm->getBlockSize() << endl; } break; case 'w': case 'W': if (p.method == Params::BM) { - bm.winSize = max(bm.winSize - 1, 2); - cout << "win_size: " << bm.winSize << endl; + bm->setBlockSize(max(bm->getBlockSize() - 1, 2)); + cout << "win_size: " << bm->getBlockSize() << endl; } break; case '3': diff --git a/samples/gpu/stereo_multi.cpp b/samples/gpu/stereo_multi.cpp index f85efe109..1bb09b22b 100644 --- a/samples/gpu/stereo_multi.cpp +++ b/samples/gpu/stereo_multi.cpp @@ -51,7 +51,7 @@ struct Worker { void operator()(int device_id) const; }; // GPUs data GpuMat d_left[2]; GpuMat d_right[2]; -StereoBM_GPU* bm[2]; +Ptr bm[2]; GpuMat d_result[2]; static void printHelp() @@ -112,13 +112,13 @@ int main(int argc, char** argv) setDevice(0); d_left[0].upload(left.rowRange(0, left.rows / 2)); d_right[0].upload(right.rowRange(0, right.rows / 2)); - bm[0] = new StereoBM_GPU(); + bm[0] = gpu::createStereoBM(); // Split source images for processing on the GPU #1 setDevice(1); d_left[1].upload(left.rowRange(left.rows / 2, left.rows)); d_right[1].upload(right.rowRange(right.rows / 2, right.rows)); - bm[1] = new StereoBM_GPU(); + bm[1] = gpu::createStereoBM(); // Execute calculation in two threads using two GPUs int devices[] = {0, 1}; @@ -130,7 +130,7 @@ int main(int argc, char** argv) d_left[0].release(); d_right[0].release(); d_result[0].release(); - delete bm[0]; + bm[0].release(); // Release the second GPU resources setDevice(1); @@ -138,7 +138,7 @@ int main(int argc, char** argv) d_left[1].release(); d_right[1].release(); d_result[1].release(); - delete bm[1]; + bm[1].release(); waitKey(); return 0; @@ -149,8 +149,7 @@ void Worker::operator()(int device_id) const { setDevice(device_id); - bm[device_id]->operator()(d_left[device_id], d_right[device_id], - d_result[device_id]); + bm[device_id]->compute(d_left[device_id], d_right[device_id], d_result[device_id]); std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): finished\n"; From d0e89337da2e7fe01dbaa249ad262b36c1463f45 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 8 May 2013 15:40:38 +0400 Subject: [PATCH 082/121] refactored StereoBeliefPropagation --- .../gpustereo/include/opencv2/gpustereo.hpp | 80 ++- modules/gpustereo/perf/perf_stereo.cpp | 4 +- modules/gpustereo/src/stereobp.cpp | 528 ++++++++++-------- modules/gpustereo/test/test_stereo.cpp | 9 +- samples/gpu/stereo_match.cpp | 30 +- 5 files changed, 345 insertions(+), 306 deletions(-) diff --git a/modules/gpustereo/include/opencv2/gpustereo.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp index 06843e71b..2a0869b68 100644 --- a/modules/gpustereo/include/opencv2/gpustereo.hpp +++ b/modules/gpustereo/include/opencv2/gpustereo.hpp @@ -65,61 +65,55 @@ public: CV_EXPORTS Ptr createStereoBM(int numDisparities = 64, int blockSize = 19); +///////////////////////////////////////// +// StereoBeliefPropagation - -// "Efficient Belief Propagation for Early Vision" -// P.Felzenszwalb -class CV_EXPORTS StereoBeliefPropagation +//! "Efficient Belief Propagation for Early Vision" P.Felzenszwalb +class CV_EXPORTS StereoBeliefPropagation : public cv::StereoMatcher { public: - enum { DEFAULT_NDISP = 64 }; - enum { DEFAULT_ITERS = 5 }; - enum { DEFAULT_LEVELS = 5 }; - - static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels); - - //! the default constructor - explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, - int iters = DEFAULT_ITERS, - int levels = DEFAULT_LEVELS, - int msg_type = CV_32F); - - //! the full constructor taking the number of disparities, number of BP iterations on each level, - //! number of levels, truncation of data cost, data weight, - //! truncation of discontinuity cost and discontinuity single jump - //! DataTerm = data_weight * min(fabs(I2-I1), max_data_term) - //! DiscTerm = min(disc_single_jump * fabs(f1-f2), max_disc_term) - //! please see paper for more details - StereoBeliefPropagation(int ndisp, int iters, int levels, - float max_data_term, float data_weight, - float max_disc_term, float disc_single_jump, - int msg_type = CV_32F); - - //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair, - //! if disparity is empty output type will be CV_16S else output type will be disparity.type(). - void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null()); + using cv::StereoMatcher::compute; + virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0; //! version for user specified data term - void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null()); + virtual void compute(InputArray data, OutputArray disparity, Stream& stream = Stream::Null()) = 0; - int ndisp; + //! number of BP iterations on each level + virtual int getNumIters() const = 0; + virtual void setNumIters(int iters) = 0; - int iters; - int levels; + //! number of levels + virtual int getNumLevels() const = 0; + virtual void setNumLevels(int levels) = 0; - float max_data_term; - float data_weight; - float max_disc_term; - float disc_single_jump; + //! truncation of data cost + virtual double getMaxDataTerm() const = 0; + virtual void setMaxDataTerm(double max_data_term) = 0; - int msg_type; -private: - GpuMat u, d, l, r, u2, d2, l2, r2; - std::vector datas; - GpuMat out; + //! data weight + virtual double getDataWeight() const = 0; + virtual void setDataWeight(double data_weight) = 0; + + //! truncation of discontinuity cost + virtual double getMaxDiscTerm() const = 0; + virtual void setMaxDiscTerm(double max_disc_term) = 0; + + //! discontinuity single jump + virtual double getDiscSingleJump() const = 0; + virtual void setDiscSingleJump(double disc_single_jump) = 0; + + virtual int getMsgType() const = 0; + virtual void setMsgType(int msg_type) = 0; + + static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels); }; +CV_EXPORTS Ptr + createStereoBeliefPropagation(int ndisp = 64, int iters = 5, int levels = 5, int msg_type = CV_32F); + + + // "A Constant-Space Belief Propagation Algorithm for Stereo Matching" // Qingxiong Yang, Liang Wang, Narendra Ahuja // http://vision.ai.uiuc.edu/~qyang6/ diff --git a/modules/gpustereo/perf/perf_stereo.cpp b/modules/gpustereo/perf/perf_stereo.cpp index fde0bbaa1..c31788ea8 100644 --- a/modules/gpustereo/perf/perf_stereo.cpp +++ b/modules/gpustereo/perf/perf_stereo.cpp @@ -107,13 +107,13 @@ PERF_TEST_P(ImagePair, StereoBeliefPropagation, if (PERF_RUN_GPU()) { - cv::gpu::StereoBeliefPropagation d_bp(ndisp); + cv::Ptr d_bp = cv::gpu::createStereoBeliefPropagation(ndisp); const cv::gpu::GpuMat d_imgLeft(imgLeft); const cv::gpu::GpuMat d_imgRight(imgRight); cv::gpu::GpuMat dst; - TEST_CYCLE() d_bp(d_imgLeft, d_imgRight, dst); + TEST_CYCLE() d_bp->compute(d_imgLeft, d_imgRight, dst); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpustereo/src/stereobp.cpp b/modules/gpustereo/src/stereobp.cpp index 5ce56c1d7..ac3bcfe33 100644 --- a/modules/gpustereo/src/stereobp.cpp +++ b/modules/gpustereo/src/stereobp.cpp @@ -49,12 +49,7 @@ using namespace cv::gpu; void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int, int, int&, int&, int&) { throw_no_cuda(); } -cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, int) { throw_no_cuda(); } -cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int, int, int, float, float, float, float, int) { throw_no_cuda(); } - -void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } - -void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +Ptr cv::gpu::createStereoBeliefPropagation(int, int, int, int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -78,14 +73,295 @@ namespace cv { namespace gpu { namespace cudev } }}} -using namespace ::cv::gpu::cudev::stereobp; - namespace { + class StereoBPImpl : public gpu::StereoBeliefPropagation + { + public: + StereoBPImpl(int ndisp, int iters, int levels, int msg_type); + + void compute(InputArray left, InputArray right, OutputArray disparity); + void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream); + void compute(InputArray data, OutputArray disparity, Stream& stream); + + int getMinDisparity() const { return 0; } + void setMinDisparity(int /*minDisparity*/) {} + + int getNumDisparities() const { return ndisp_; } + void setNumDisparities(int numDisparities) { ndisp_ = numDisparities; } + + int getBlockSize() const { return 0; } + void setBlockSize(int /*blockSize*/) {} + + int getSpeckleWindowSize() const { return 0; } + void setSpeckleWindowSize(int /*speckleWindowSize*/) {} + + int getSpeckleRange() const { return 0; } + void setSpeckleRange(int /*speckleRange*/) {} + + int getDisp12MaxDiff() const { return 0; } + void setDisp12MaxDiff(int /*disp12MaxDiff*/) {} + + int getNumIters() const { return iters_; } + void setNumIters(int iters) { iters_ = iters; } + + int getNumLevels() const { return levels_; } + void setNumLevels(int levels) { levels_ = levels; } + + double getMaxDataTerm() const { return max_data_term_; } + void setMaxDataTerm(double max_data_term) { max_data_term_ = (float) max_data_term; } + + double getDataWeight() const { return data_weight_; } + void setDataWeight(double data_weight) { data_weight_ = (float) data_weight; } + + double getMaxDiscTerm() const { return max_disc_term_; } + void setMaxDiscTerm(double max_disc_term) { max_disc_term_ = (float) max_disc_term; } + + double getDiscSingleJump() const { return disc_single_jump_; } + void setDiscSingleJump(double disc_single_jump) { disc_single_jump_ = (float) disc_single_jump; } + + int getMsgType() const { return msg_type_; } + void setMsgType(int msg_type) { msg_type_ = msg_type; } + + private: + void init(Stream& stream); + void calcBP(OutputArray disp, Stream& stream); + + int ndisp_; + int iters_; + int levels_; + float max_data_term_; + float data_weight_; + float max_disc_term_; + float disc_single_jump_; + int msg_type_; + + float scale_; + int rows_, cols_; + std::vector cols_all_, rows_all_; + GpuMat u_, d_, l_, r_, u2_, d2_, l2_, r2_; + std::vector datas_; + GpuMat outBuf_; + }; + const float DEFAULT_MAX_DATA_TERM = 10.0f; const float DEFAULT_DATA_WEIGHT = 0.07f; const float DEFAULT_MAX_DISC_TERM = 1.7f; const float DEFAULT_DISC_SINGLE_JUMP = 1.0f; + + StereoBPImpl::StereoBPImpl(int ndisp, int iters, int levels, int msg_type) : + ndisp_(ndisp), iters_(iters), levels_(levels), + max_data_term_(DEFAULT_MAX_DATA_TERM), data_weight_(DEFAULT_DATA_WEIGHT), + max_disc_term_(DEFAULT_MAX_DISC_TERM), disc_single_jump_(DEFAULT_DISC_SINGLE_JUMP), + msg_type_(msg_type) + { + } + + void StereoBPImpl::compute(InputArray left, InputArray right, OutputArray disparity) + { + compute(left, right, disparity, Stream::Null()); + } + + void StereoBPImpl::compute(InputArray _left, InputArray _right, OutputArray disparity, Stream& stream) + { + using namespace cv::gpu::cudev::stereobp; + + typedef void (*comp_data_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream); + static const comp_data_t comp_data_callers[2][5] = + { + {0, comp_data_gpu, 0, comp_data_gpu, comp_data_gpu}, + {0, comp_data_gpu, 0, comp_data_gpu, comp_data_gpu} + }; + + scale_ = msg_type_ == CV_32F ? 1.0f : 10.0f; + + CV_Assert( 0 < ndisp_ && 0 < iters_ && 0 < levels_ ); + CV_Assert( msg_type_ == CV_32F || msg_type_ == CV_16S ); + CV_Assert( msg_type_ == CV_32F || (1 << (levels_ - 1)) * scale_ * max_data_term_ < std::numeric_limits::max() ); + + GpuMat left = _left.getGpuMat(); + GpuMat right = _right.getGpuMat(); + + CV_Assert( left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4 ); + CV_Assert( left.size() == right.size() && left.type() == right.type() ); + + rows_ = left.rows; + cols_ = left.cols; + + const int divisor = (int) pow(2.f, levels_ - 1.0f); + const int lowest_cols = cols_ / divisor; + const int lowest_rows = rows_ / divisor; + const int min_image_dim_size = 2; + CV_Assert( std::min(lowest_cols, lowest_rows) > min_image_dim_size ); + + init(stream); + + datas_[0].create(rows_ * ndisp_, cols_, msg_type_); + + comp_data_callers[msg_type_ == CV_32F][left.channels()](left, right, datas_[0], StreamAccessor::getStream(stream)); + + calcBP(disparity, stream); + } + + void StereoBPImpl::compute(InputArray _data, OutputArray disparity, Stream& stream) + { + scale_ = msg_type_ == CV_32F ? 1.0f : 10.0f; + + CV_Assert( 0 < ndisp_ && 0 < iters_ && 0 < levels_ ); + CV_Assert( msg_type_ == CV_32F || msg_type_ == CV_16S ); + CV_Assert( msg_type_ == CV_32F || (1 << (levels_ - 1)) * scale_ * max_data_term_ < std::numeric_limits::max() ); + + GpuMat data = _data.getGpuMat(); + + CV_Assert( (data.type() == msg_type_) && (data.rows % ndisp_ == 0) ); + + rows_ = data.rows / ndisp_; + cols_ = data.cols; + + const int divisor = (int) pow(2.f, levels_ - 1.0f); + const int lowest_cols = cols_ / divisor; + const int lowest_rows = rows_ / divisor; + const int min_image_dim_size = 2; + CV_Assert( std::min(lowest_cols, lowest_rows) > min_image_dim_size ); + + init(stream); + + data.copyTo(datas_[0], stream); + + calcBP(disparity, stream); + } + + void StereoBPImpl::init(Stream& stream) + { + using namespace cv::gpu::cudev::stereobp; + + u_.create(rows_ * ndisp_, cols_, msg_type_); + d_.create(rows_ * ndisp_, cols_, msg_type_); + l_.create(rows_ * ndisp_, cols_, msg_type_); + r_.create(rows_ * ndisp_, cols_, msg_type_); + + if (levels_ & 1) + { + //can clear less area + u_.setTo(0, stream); + d_.setTo(0, stream); + l_.setTo(0, stream); + r_.setTo(0, stream); + } + + if (levels_ > 1) + { + int less_rows = (rows_ + 1) / 2; + int less_cols = (cols_ + 1) / 2; + + u2_.create(less_rows * ndisp_, less_cols, msg_type_); + d2_.create(less_rows * ndisp_, less_cols, msg_type_); + l2_.create(less_rows * ndisp_, less_cols, msg_type_); + r2_.create(less_rows * ndisp_, less_cols, msg_type_); + + if ((levels_ & 1) == 0) + { + u2_.setTo(0, stream); + d2_.setTo(0, stream); + l2_.setTo(0, stream); + r2_.setTo(0, stream); + } + } + + load_constants(ndisp_, max_data_term_, scale_ * data_weight_, scale_ * max_disc_term_, scale_ * disc_single_jump_); + + datas_.resize(levels_); + + cols_all_.resize(levels_); + rows_all_.resize(levels_); + + cols_all_[0] = cols_; + rows_all_[0] = rows_; + } + + void StereoBPImpl::calcBP(OutputArray disp, Stream& _stream) + { + using namespace cv::gpu::cudev::stereobp; + + typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); + static const data_step_down_t data_step_down_callers[2] = + { + data_step_down_gpu, data_step_down_gpu + }; + + typedef void (*level_up_messages_t)(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream); + static const level_up_messages_t level_up_messages_callers[2] = + { + level_up_messages_gpu, level_up_messages_gpu + }; + + typedef void (*calc_all_iterations_t)(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream); + static const calc_all_iterations_t calc_all_iterations_callers[2] = + { + calc_all_iterations_gpu, calc_all_iterations_gpu + }; + + typedef void (*output_t)(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz& disp, cudaStream_t stream); + static const output_t output_callers[2] = + { + output_gpu, output_gpu + }; + + const int funcIdx = msg_type_ == CV_32F; + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + for (int i = 1; i < levels_; ++i) + { + cols_all_[i] = (cols_all_[i-1] + 1) / 2; + rows_all_[i] = (rows_all_[i-1] + 1) / 2; + + datas_[i].create(rows_all_[i] * ndisp_, cols_all_[i], msg_type_); + + data_step_down_callers[funcIdx](cols_all_[i], rows_all_[i], rows_all_[i-1], datas_[i-1], datas_[i], stream); + } + + PtrStepSzb mus[] = {u_, u2_}; + PtrStepSzb mds[] = {d_, d2_}; + PtrStepSzb mrs[] = {r_, r2_}; + PtrStepSzb mls[] = {l_, l2_}; + + int mem_idx = (levels_ & 1) ? 0 : 1; + + for (int i = levels_ - 1; i >= 0; --i) + { + // for lower level we have already computed messages by setting to zero + if (i != levels_ - 1) + level_up_messages_callers[funcIdx](mem_idx, cols_all_[i], rows_all_[i], rows_all_[i+1], mus, mds, mls, mrs, stream); + + calc_all_iterations_callers[funcIdx](cols_all_[i], rows_all_[i], iters_, mus[mem_idx], mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas_[i], stream); + + mem_idx = (mem_idx + 1) & 1; + } + + const int dtype = disp.fixedType() ? disp.type() : CV_16SC1; + + disp.create(rows_, cols_, dtype); + GpuMat out = disp.getGpuMat(); + + if (dtype != CV_16SC1) + { + outBuf_.create(rows_, cols_, CV_16SC1); + out = outBuf_; + } + + out.setTo(0, _stream); + + output_callers[funcIdx](u_, d_, l_, r_, datas_.front(), out, stream); + + if (dtype != CV_16SC1) + out.convertTo(disp, dtype, _stream); + } +} + +Ptr cv::gpu::createStereoBeliefPropagation(int ndisp, int iters, int levels, int msg_type) +{ + return new StereoBPImpl(ndisp, iters, levels, msg_type); } void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels) @@ -101,240 +377,4 @@ void cv::gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int if (levels == 0) levels++; } -cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, int msg_type_) - : ndisp(ndisp_), iters(iters_), levels(levels_), - max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT), - max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), - msg_type(msg_type_), datas(levels_) -{ -} - -cv::gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp_, int iters_, int levels_, float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_, int msg_type_) - : ndisp(ndisp_), iters(iters_), levels(levels_), - max_data_term(max_data_term_), data_weight(data_weight_), - max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), - msg_type(msg_type_), datas(levels_) -{ -} - -namespace -{ - class StereoBeliefPropagationImpl - { - public: - StereoBeliefPropagationImpl(StereoBeliefPropagation& rthis_, - GpuMat& u_, GpuMat& d_, GpuMat& l_, GpuMat& r_, - GpuMat& u2_, GpuMat& d2_, GpuMat& l2_, GpuMat& r2_, - std::vector& datas_, GpuMat& out_) - : rthis(rthis_), u(u_), d(d_), l(l_), r(r_), u2(u2_), d2(d2_), l2(l2_), r2(r2_), datas(datas_), out(out_), - zero(Scalar::all(0)), scale(rthis_.msg_type == CV_32F ? 1.0f : 10.0f) - { - CV_Assert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels); - CV_Assert(rthis.msg_type == CV_32F || rthis.msg_type == CV_16S); - CV_Assert(rthis.msg_type == CV_32F || (1 << (rthis.levels - 1)) * scale * rthis.max_data_term < std::numeric_limits::max()); - } - - void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream) - { - typedef void (*comp_data_t)(const PtrStepSzb& left, const PtrStepSzb& right, const PtrStepSzb& data, cudaStream_t stream); - static const comp_data_t comp_data_callers[2][5] = - { - {0, comp_data_gpu, 0, comp_data_gpu, comp_data_gpu}, - {0, comp_data_gpu, 0, comp_data_gpu, comp_data_gpu} - }; - - CV_Assert(left.size() == right.size() && left.type() == right.type()); - CV_Assert(left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4); - - rows = left.rows; - cols = left.cols; - - int divisor = (int)pow(2.f, rthis.levels - 1.0f); - int lowest_cols = cols / divisor; - int lowest_rows = rows / divisor; - const int min_image_dim_size = 2; - CV_Assert(std::min(lowest_cols, lowest_rows) > min_image_dim_size); - - init(stream); - - datas[0].create(rows * rthis.ndisp, cols, rthis.msg_type); - - comp_data_callers[rthis.msg_type == CV_32F][left.channels()](left, right, datas[0], StreamAccessor::getStream(stream)); - - calcBP(disp, stream); - } - - void operator()(const GpuMat& data, GpuMat& disp, Stream& stream) - { - CV_Assert((data.type() == rthis.msg_type) && (data.rows % rthis.ndisp == 0)); - - rows = data.rows / rthis.ndisp; - cols = data.cols; - - int divisor = (int)pow(2.f, rthis.levels - 1.0f); - int lowest_cols = cols / divisor; - int lowest_rows = rows / divisor; - const int min_image_dim_size = 2; - CV_Assert(std::min(lowest_cols, lowest_rows) > min_image_dim_size); - - init(stream); - - datas[0] = data; - - calcBP(disp, stream); - } - private: - void init(Stream& stream) - { - u.create(rows * rthis.ndisp, cols, rthis.msg_type); - d.create(rows * rthis.ndisp, cols, rthis.msg_type); - l.create(rows * rthis.ndisp, cols, rthis.msg_type); - r.create(rows * rthis.ndisp, cols, rthis.msg_type); - - if (rthis.levels & 1) - { - //can clear less area - u.setTo(zero, stream); - d.setTo(zero, stream); - l.setTo(zero, stream); - r.setTo(zero, stream); - } - - if (rthis.levels > 1) - { - int less_rows = (rows + 1) / 2; - int less_cols = (cols + 1) / 2; - - u2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - d2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - l2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - r2.create(less_rows * rthis.ndisp, less_cols, rthis.msg_type); - - if ((rthis.levels & 1) == 0) - { - u2.setTo(zero, stream); - d2.setTo(zero, stream); - l2.setTo(zero, stream); - r2.setTo(zero, stream); - } - } - - load_constants(rthis.ndisp, rthis.max_data_term, scale * rthis.data_weight, scale * rthis.max_disc_term, scale * rthis.disc_single_jump); - - datas.resize(rthis.levels); - - cols_all.resize(rthis.levels); - rows_all.resize(rthis.levels); - - cols_all[0] = cols; - rows_all[0] = rows; - } - - void calcBP(GpuMat& disp, Stream& stream) - { - typedef void (*data_step_down_t)(int dst_cols, int dst_rows, int src_rows, const PtrStepSzb& src, const PtrStepSzb& dst, cudaStream_t stream); - static const data_step_down_t data_step_down_callers[2] = - { - data_step_down_gpu, data_step_down_gpu - }; - - typedef void (*level_up_messages_t)(int dst_idx, int dst_cols, int dst_rows, int src_rows, PtrStepSzb* mus, PtrStepSzb* mds, PtrStepSzb* mls, PtrStepSzb* mrs, cudaStream_t stream); - static const level_up_messages_t level_up_messages_callers[2] = - { - level_up_messages_gpu, level_up_messages_gpu - }; - - typedef void (*calc_all_iterations_t)(int cols, int rows, int iters, const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, cudaStream_t stream); - static const calc_all_iterations_t calc_all_iterations_callers[2] = - { - calc_all_iterations_gpu, calc_all_iterations_gpu - }; - - typedef void (*output_t)(const PtrStepSzb& u, const PtrStepSzb& d, const PtrStepSzb& l, const PtrStepSzb& r, const PtrStepSzb& data, const PtrStepSz& disp, cudaStream_t stream); - static const output_t output_callers[2] = - { - output_gpu, output_gpu - }; - - const int funcIdx = rthis.msg_type == CV_32F; - - cudaStream_t cudaStream = StreamAccessor::getStream(stream); - - for (int i = 1; i < rthis.levels; ++i) - { - cols_all[i] = (cols_all[i-1] + 1) / 2; - rows_all[i] = (rows_all[i-1] + 1) / 2; - - datas[i].create(rows_all[i] * rthis.ndisp, cols_all[i], rthis.msg_type); - - data_step_down_callers[funcIdx](cols_all[i], rows_all[i], rows_all[i-1], datas[i-1], datas[i], cudaStream); - } - - PtrStepSzb mus[] = {u, u2}; - PtrStepSzb mds[] = {d, d2}; - PtrStepSzb mrs[] = {r, r2}; - PtrStepSzb mls[] = {l, l2}; - - int mem_idx = (rthis.levels & 1) ? 0 : 1; - - for (int i = rthis.levels - 1; i >= 0; --i) - { - // for lower level we have already computed messages by setting to zero - if (i != rthis.levels - 1) - level_up_messages_callers[funcIdx](mem_idx, cols_all[i], rows_all[i], rows_all[i+1], mus, mds, mls, mrs, cudaStream); - - calc_all_iterations_callers[funcIdx](cols_all[i], rows_all[i], rthis.iters, mus[mem_idx], mds[mem_idx], mls[mem_idx], mrs[mem_idx], datas[i], cudaStream); - - mem_idx = (mem_idx + 1) & 1; - } - - if (disp.empty()) - disp.create(rows, cols, CV_16S); - - out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out)); - - out.setTo(zero, stream); - - output_callers[funcIdx](u, d, l, r, datas.front(), out, cudaStream); - - if (disp.type() != CV_16S) - out.convertTo(disp, disp.type(), stream); - } - - StereoBeliefPropagation& rthis; - - GpuMat& u; - GpuMat& d; - GpuMat& l; - GpuMat& r; - - GpuMat& u2; - GpuMat& d2; - GpuMat& l2; - GpuMat& r2; - - std::vector& datas; - GpuMat& out; - - const Scalar zero; - const float scale; - - int rows, cols; - - std::vector cols_all, rows_all; - }; -} - -void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream) -{ - StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out); - impl(left, right, disp, stream); -} - -void cv::gpu::StereoBeliefPropagation::operator()(const GpuMat& data, GpuMat& disp, Stream& stream) -{ - StereoBeliefPropagationImpl impl(*this, u, d, l, r, u2, d2, l2, r2, datas, out); - impl(data, disp, stream); -} - #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpustereo/test/test_stereo.cpp b/modules/gpustereo/test/test_stereo.cpp index 2046890d6..4f5e41d8b 100644 --- a/modules/gpustereo/test/test_stereo.cpp +++ b/modules/gpustereo/test/test_stereo.cpp @@ -106,10 +106,15 @@ GPU_TEST_P(StereoBeliefPropagation, Regression) ASSERT_FALSE(right_image.empty()); ASSERT_FALSE(disp_gold.empty()); - cv::gpu::StereoBeliefPropagation bp(64, 8, 2, 25, 0.1f, 15, 1, CV_16S); + cv::Ptr bp = cv::gpu::createStereoBeliefPropagation(64, 8, 2, CV_16S); + bp->setMaxDataTerm(25.0); + bp->setDataWeight(0.1); + bp->setMaxDiscTerm(15.0); + bp->setDiscSingleJump(1.0); + cv::gpu::GpuMat disp; - bp(loadMat(left_image), loadMat(right_image), disp); + bp->compute(loadMat(left_image), loadMat(right_image), disp); cv::Mat h_disp(disp); h_disp.convertTo(h_disp, disp_gold.depth()); diff --git a/samples/gpu/stereo_match.cpp b/samples/gpu/stereo_match.cpp index 071ddf584..86152f3ce 100644 --- a/samples/gpu/stereo_match.cpp +++ b/samples/gpu/stereo_match.cpp @@ -66,7 +66,7 @@ private: gpu::GpuMat d_left, d_right; Ptr bm; - gpu::StereoBeliefPropagation bp; + Ptr bp; gpu::StereoConstantSpaceBP csbp; int64 work_begin; @@ -173,7 +173,7 @@ void App::run() // Set common parameters bm = gpu::createStereoBM(p.ndisp); - bp.ndisp = p.ndisp; + bp = gpu::createStereoBeliefPropagation(p.ndisp); csbp.ndisp = p.ndisp; // Prepare disparity map of specified type @@ -203,7 +203,7 @@ void App::run() } bm->compute(d_left, d_right, d_disp); break; - case Params::BP: bp(d_left, d_right, d_disp); break; + case Params::BP: bp->compute(d_left, d_right, d_disp); break; case Params::CSBP: csbp(d_left, d_right, d_disp); break; } workEnd(); @@ -232,8 +232,8 @@ void App::printParams() const cout << "prefilter_sobel: " << bm->getPreFilterType() << endl; break; case Params::BP: - cout << "iter_count: " << bp.iters << endl; - cout << "level_count: " << bp.levels << endl; + cout << "iter_count: " << bp->getNumIters() << endl; + cout << "level_count: " << bp->getNumLevels() << endl; break; case Params::CSBP: cout << "iter_count: " << csbp.iters << endl; @@ -305,14 +305,14 @@ void App::handleKey(char key) p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8; cout << "ndisp: " << p.ndisp << endl; bm->setNumDisparities(p.ndisp); - bp.ndisp = p.ndisp; + bp->setNumDisparities(p.ndisp); csbp.ndisp = p.ndisp; break; case 'q': case 'Q': p.ndisp = max(p.ndisp - 8, 1); cout << "ndisp: " << p.ndisp << endl; bm->setNumDisparities(p.ndisp); - bp.ndisp = p.ndisp; + bp->setNumDisparities(p.ndisp); csbp.ndisp = p.ndisp; break; case '2': @@ -332,8 +332,8 @@ void App::handleKey(char key) case '3': if (p.method == Params::BP) { - bp.iters += 1; - cout << "iter_count: " << bp.iters << endl; + bp->setNumIters(bp->getNumIters() + 1); + cout << "iter_count: " << bp->getNumIters() << endl; } else if (p.method == Params::CSBP) { @@ -344,8 +344,8 @@ void App::handleKey(char key) case 'e': case 'E': if (p.method == Params::BP) { - bp.iters = max(bp.iters - 1, 1); - cout << "iter_count: " << bp.iters << endl; + bp->setNumIters(max(bp->getNumIters() - 1, 1)); + cout << "iter_count: " << bp->getNumIters() << endl; } else if (p.method == Params::CSBP) { @@ -356,8 +356,8 @@ void App::handleKey(char key) case '4': if (p.method == Params::BP) { - bp.levels += 1; - cout << "level_count: " << bp.levels << endl; + bp->setNumLevels(bp->getNumLevels() + 1); + cout << "level_count: " << bp->getNumLevels() << endl; } else if (p.method == Params::CSBP) { @@ -368,8 +368,8 @@ void App::handleKey(char key) case 'r': case 'R': if (p.method == Params::BP) { - bp.levels = max(bp.levels - 1, 1); - cout << "level_count: " << bp.levels << endl; + bp->setNumLevels(max(bp->getNumLevels() - 1, 1)); + cout << "level_count: " << bp->getNumLevels() << endl; } else if (p.method == Params::CSBP) { From be9bb8f18b781ef84bf69b9462642b86ab067ff0 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 8 May 2013 16:25:03 +0400 Subject: [PATCH 083/121] refactored StereoConstantSpaceBP --- .../gpustereo/include/opencv2/gpustereo.hpp | 69 +-- modules/gpustereo/perf/perf_stereo.cpp | 4 +- modules/gpustereo/src/precomp.hpp | 1 + modules/gpustereo/src/stereocsbp.cpp | 454 +++++++++++------- modules/gpustereo/test/test_stereo.cpp | 4 +- samples/gpu/stereo_match.cpp | 30 +- 6 files changed, 315 insertions(+), 247 deletions(-) diff --git a/modules/gpustereo/include/opencv2/gpustereo.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp index 2a0869b68..054504685 100644 --- a/modules/gpustereo/include/opencv2/gpustereo.hpp +++ b/modules/gpustereo/include/opencv2/gpustereo.hpp @@ -112,64 +112,31 @@ public: CV_EXPORTS Ptr createStereoBeliefPropagation(int ndisp = 64, int iters = 5, int levels = 5, int msg_type = CV_32F); +///////////////////////////////////////// +// StereoConstantSpaceBP - -// "A Constant-Space Belief Propagation Algorithm for Stereo Matching" -// Qingxiong Yang, Liang Wang, Narendra Ahuja -// http://vision.ai.uiuc.edu/~qyang6/ -class CV_EXPORTS StereoConstantSpaceBP +//! "A Constant-Space Belief Propagation Algorithm for Stereo Matching" +//! Qingxiong Yang, Liang Wang, Narendra Ahuja +//! http://vision.ai.uiuc.edu/~qyang6/ +class CV_EXPORTS StereoConstantSpaceBP : public gpu::StereoBeliefPropagation { public: - enum { DEFAULT_NDISP = 128 }; - enum { DEFAULT_ITERS = 8 }; - enum { DEFAULT_LEVELS = 4 }; - enum { DEFAULT_NR_PLANE = 4 }; + //! number of active disparity on the first level + virtual int getNrPlane() const = 0; + virtual void setNrPlane(int nr_plane) = 0; + + virtual bool getUseLocalInitDataCost() const = 0; + virtual void setUseLocalInitDataCost(bool use_local_init_data_cost) = 0; static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane); - - //! the default constructor - explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, - int iters = DEFAULT_ITERS, - int levels = DEFAULT_LEVELS, - int nr_plane = DEFAULT_NR_PLANE, - int msg_type = CV_32F); - - //! the full constructor taking the number of disparities, number of BP iterations on each level, - //! number of levels, number of active disparity on the first level, truncation of data cost, data weight, - //! truncation of discontinuity cost, discontinuity single jump and minimum disparity threshold - StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, - float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, - int min_disp_th = 0, - int msg_type = CV_32F); - - //! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair, - //! if disparity is empty output type will be CV_16S else output type will be disparity.type(). - void operator()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null()); - - int ndisp; - - int iters; - int levels; - - int nr_plane; - - float max_data_term; - float data_weight; - float max_disc_term; - float disc_single_jump; - - int min_disp_th; - - int msg_type; - - bool use_local_init_data_cost; -private: - GpuMat messages_buffers; - - GpuMat temp; - GpuMat out; }; +CV_EXPORTS Ptr + createStereoConstantSpaceBP(int ndisp = 128, int iters = 8, int levels = 4, int nr_plane = 4, int msg_type = CV_32F); + + + + // Disparity map refinement using joint bilateral filtering given a single color image. // Qingxiong Yang, Liang Wang, Narendra Ahuja // http://vision.ai.uiuc.edu/~qyang6/ diff --git a/modules/gpustereo/perf/perf_stereo.cpp b/modules/gpustereo/perf/perf_stereo.cpp index c31788ea8..276d97207 100644 --- a/modules/gpustereo/perf/perf_stereo.cpp +++ b/modules/gpustereo/perf/perf_stereo.cpp @@ -141,13 +141,13 @@ PERF_TEST_P(ImagePair, StereoConstantSpaceBP, if (PERF_RUN_GPU()) { - cv::gpu::StereoConstantSpaceBP d_csbp(ndisp); + cv::Ptr d_csbp = cv::gpu::createStereoConstantSpaceBP(ndisp); const cv::gpu::GpuMat d_imgLeft(imgLeft); const cv::gpu::GpuMat d_imgRight(imgRight); cv::gpu::GpuMat dst; - TEST_CYCLE() d_csbp(d_imgLeft, d_imgRight, dst); + TEST_CYCLE() d_csbp->compute(d_imgLeft, d_imgRight, dst); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpustereo/src/precomp.hpp b/modules/gpustereo/src/precomp.hpp index 963cb4d07..934da9fd2 100644 --- a/modules/gpustereo/src/precomp.hpp +++ b/modules/gpustereo/src/precomp.hpp @@ -48,5 +48,6 @@ #include "opencv2/gpustereo.hpp" #include "opencv2/core/private.gpu.hpp" +#include "opencv2/core/utility.hpp" #endif /* __OPENCV_PRECOMP_H__ */ diff --git a/modules/gpustereo/src/stereocsbp.cpp b/modules/gpustereo/src/stereocsbp.cpp index cedba1eeb..9afd8d14e 100644 --- a/modules/gpustereo/src/stereocsbp.cpp +++ b/modules/gpustereo/src/stereocsbp.cpp @@ -49,13 +49,9 @@ using namespace cv::gpu; void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int, int, int&, int&, int&, int&) { throw_no_cuda(); } -cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, int) { throw_no_cuda(); } -cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int, int, int, int, float, float, float, float, int, int) { throw_no_cuda(); } - -void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +Ptr cv::gpu::createStereoConstantSpaceBP(int, int, int, int, int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ -#include "opencv2/core/utility.hpp" namespace cv { namespace gpu { namespace cudev { @@ -89,14 +85,288 @@ namespace cv { namespace gpu { namespace cudev } }}} -using namespace ::cv::gpu::cudev::stereocsbp; - namespace { + class StereoCSBPImpl : public gpu::StereoConstantSpaceBP + { + public: + StereoCSBPImpl(int ndisp, int iters, int levels, int nr_plane, int msg_type); + + void compute(InputArray left, InputArray right, OutputArray disparity); + void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream); + void compute(InputArray data, OutputArray disparity, Stream& stream); + + int getMinDisparity() const { return min_disp_th_; } + void setMinDisparity(int minDisparity) { min_disp_th_ = minDisparity; } + + int getNumDisparities() const { return ndisp_; } + void setNumDisparities(int numDisparities) { ndisp_ = numDisparities; } + + int getBlockSize() const { return 0; } + void setBlockSize(int /*blockSize*/) {} + + int getSpeckleWindowSize() const { return 0; } + void setSpeckleWindowSize(int /*speckleWindowSize*/) {} + + int getSpeckleRange() const { return 0; } + void setSpeckleRange(int /*speckleRange*/) {} + + int getDisp12MaxDiff() const { return 0; } + void setDisp12MaxDiff(int /*disp12MaxDiff*/) {} + + int getNumIters() const { return iters_; } + void setNumIters(int iters) { iters_ = iters; } + + int getNumLevels() const { return levels_; } + void setNumLevels(int levels) { levels_ = levels; } + + double getMaxDataTerm() const { return max_data_term_; } + void setMaxDataTerm(double max_data_term) { max_data_term_ = (float) max_data_term; } + + double getDataWeight() const { return data_weight_; } + void setDataWeight(double data_weight) { data_weight_ = (float) data_weight; } + + double getMaxDiscTerm() const { return max_disc_term_; } + void setMaxDiscTerm(double max_disc_term) { max_disc_term_ = (float) max_disc_term; } + + double getDiscSingleJump() const { return disc_single_jump_; } + void setDiscSingleJump(double disc_single_jump) { disc_single_jump_ = (float) disc_single_jump; } + + int getMsgType() const { return msg_type_; } + void setMsgType(int msg_type) { msg_type_ = msg_type; } + + int getNrPlane() const { return nr_plane_; } + void setNrPlane(int nr_plane) { nr_plane_ = nr_plane; } + + bool getUseLocalInitDataCost() const { return use_local_init_data_cost_; } + void setUseLocalInitDataCost(bool use_local_init_data_cost) { use_local_init_data_cost_ = use_local_init_data_cost; } + + private: + int min_disp_th_; + int ndisp_; + int iters_; + int levels_; + float max_data_term_; + float data_weight_; + float max_disc_term_; + float disc_single_jump_; + int msg_type_; + int nr_plane_; + bool use_local_init_data_cost_; + + GpuMat mbuf_; + GpuMat temp_; + GpuMat outBuf_; + }; + const float DEFAULT_MAX_DATA_TERM = 30.0f; const float DEFAULT_DATA_WEIGHT = 1.0f; const float DEFAULT_MAX_DISC_TERM = 160.0f; const float DEFAULT_DISC_SINGLE_JUMP = 10.0f; + + StereoCSBPImpl::StereoCSBPImpl(int ndisp, int iters, int levels, int nr_plane, int msg_type) : + min_disp_th_(0), ndisp_(ndisp), iters_(iters), levels_(levels), + max_data_term_(DEFAULT_MAX_DATA_TERM), data_weight_(DEFAULT_DATA_WEIGHT), + max_disc_term_(DEFAULT_MAX_DISC_TERM), disc_single_jump_(DEFAULT_DISC_SINGLE_JUMP), + msg_type_(msg_type), nr_plane_(nr_plane), use_local_init_data_cost_(true) + { + } + + void StereoCSBPImpl::compute(InputArray left, InputArray right, OutputArray disparity) + { + compute(left, right, disparity, Stream::Null()); + } + + void StereoCSBPImpl::compute(InputArray _left, InputArray _right, OutputArray disp, Stream& _stream) + { + using namespace cv::gpu::cudev::stereocsbp; + + CV_Assert( msg_type_ == CV_32F || msg_type_ == CV_16S ); + CV_Assert( 0 < ndisp_ && 0 < iters_ && 0 < levels_ && 0 < nr_plane_ && levels_ <= 8 ); + + GpuMat left = _left.getGpuMat(); + GpuMat right = _right.getGpuMat(); + + CV_Assert( left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4 ); + CV_Assert( left.size() == right.size() && left.type() == right.type() ); + + cudaStream_t stream = StreamAccessor::getStream(_stream); + + //////////////////////////////////////////////////////////////////////////////////////////// + // Init + + int rows = left.rows; + int cols = left.cols; + + levels_ = std::min(levels_, int(log((double)ndisp_) / log(2.0))); + + // compute sizes + AutoBuffer buf(levels_ * 3); + int* cols_pyr = buf; + int* rows_pyr = cols_pyr + levels_; + int* nr_plane_pyr = rows_pyr + levels_; + + cols_pyr[0] = cols; + rows_pyr[0] = rows; + nr_plane_pyr[0] = nr_plane_; + + for (int i = 1; i < levels_; i++) + { + cols_pyr[i] = cols_pyr[i-1] / 2; + rows_pyr[i] = rows_pyr[i-1] / 2; + nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2; + } + + GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected; + + //allocate buffers + int buffers_count = 10; // (up + down + left + right + disp_selected_pyr) * 2 + buffers_count += 2; // data_cost has twice more rows than other buffers, what's why +2, not +1; + buffers_count += 1; // data_cost_selected + mbuf_.create(rows * nr_plane_ * buffers_count, cols, msg_type_); + + data_cost = mbuf_.rowRange(0, rows * nr_plane_ * 2); + data_cost_selected = mbuf_.rowRange(data_cost.rows, data_cost.rows + rows * nr_plane_); + + for(int k = 0; k < 2; ++k) // in/out + { + GpuMat sub1 = mbuf_.rowRange(data_cost.rows + data_cost_selected.rows, mbuf_.rows); + GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2); + + GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] }; + for(int _r = 0; _r < 5; ++_r) + { + *buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5); + CV_DbgAssert( buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * nr_plane_ ); + } + }; + + size_t elem_step = mbuf_.step / mbuf_.elemSize(); + + Size temp_size = data_cost.size(); + if ((size_t)temp_size.area() < elem_step * rows_pyr[levels_ - 1] * ndisp_) + temp_size = Size(static_cast(elem_step), rows_pyr[levels_ - 1] * ndisp_); + + temp_.create(temp_size, msg_type_); + + //////////////////////////////////////////////////////////////////////////// + // Compute + + load_constants(ndisp_, max_data_term_, data_weight_, max_disc_term_, disc_single_jump_, min_disp_th_, left, right, temp_); + + l[0].setTo(0, _stream); + d[0].setTo(0, _stream); + r[0].setTo(0, _stream); + u[0].setTo(0, _stream); + + l[1].setTo(0, _stream); + d[1].setTo(0, _stream); + r[1].setTo(0, _stream); + u[1].setTo(0, _stream); + + data_cost.setTo(0, _stream); + data_cost_selected.setTo(0, _stream); + + int cur_idx = 0; + + if (msg_type_ == CV_32F) + { + for (int i = levels_ - 1; i >= 0; i--) + { + if (i == levels_ - 1) + { + init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), + elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); + } + else + { + compute_data_cost(disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, + left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); + + int new_idx = (cur_idx + 1) & 1; + + init_message(u[new_idx].ptr(), d[new_idx].ptr(), l[new_idx].ptr(), r[new_idx].ptr(), + u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + disp_selected_pyr[new_idx].ptr(), disp_selected_pyr[cur_idx].ptr(), + data_cost_selected.ptr(), data_cost.ptr(), elem_step, rows_pyr[i], + cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], stream); + + cur_idx = new_idx; + } + + calc_all_iterations(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, stream); + } + } + else + { + for (int i = levels_ - 1; i >= 0; i--) + { + if (i == levels_ - 1) + { + init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), + elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], ndisp_, left.channels(), use_local_init_data_cost_, stream); + } + else + { + compute_data_cost(disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, + left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), stream); + + int new_idx = (cur_idx + 1) & 1; + + init_message(u[new_idx].ptr(), d[new_idx].ptr(), l[new_idx].ptr(), r[new_idx].ptr(), + u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + disp_selected_pyr[new_idx].ptr(), disp_selected_pyr[cur_idx].ptr(), + data_cost_selected.ptr(), data_cost.ptr(), elem_step, rows_pyr[i], + cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], stream); + + cur_idx = new_idx; + } + + calc_all_iterations(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, + rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], iters_, stream); + } + } + + const int dtype = disp.fixedType() ? disp.type() : CV_16SC1; + + disp.create(rows, cols, dtype); + GpuMat out = disp.getGpuMat(); + + if (dtype != CV_16SC1) + { + outBuf_.create(rows, cols, CV_16SC1); + out = outBuf_; + } + + out.setTo(0, _stream); + + if (msg_type_ == CV_32F) + { + compute_disp(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, out, nr_plane_pyr[0], stream); + } + else + { + compute_disp(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), + data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, out, nr_plane_pyr[0], stream); + } + + if (dtype != CV_16SC1) + out.convertTo(disp, dtype, _stream); + } + + void StereoCSBPImpl::compute(InputArray /*data*/, OutputArray /*disparity*/, Stream& /*stream*/) + { + CV_Error(Error::StsNotImplemented, "Not implemented"); + } +} + +Ptr cv::gpu::createStereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, int msg_type) +{ + return new StereoCSBPImpl(ndisp, iters, levels, nr_plane, msg_type); } void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane) @@ -114,174 +384,4 @@ void cv::gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int he nr_plane = (int) ((float) ndisp / std::pow(2.0, levels + 1)); } -cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_, - int msg_type_) - - : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_), - max_data_term(DEFAULT_MAX_DATA_TERM), data_weight(DEFAULT_DATA_WEIGHT), - max_disc_term(DEFAULT_MAX_DISC_TERM), disc_single_jump(DEFAULT_DISC_SINGLE_JUMP), min_disp_th(0), - msg_type(msg_type_), use_local_init_data_cost(true) -{ - CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S); -} - -cv::gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp_, int iters_, int levels_, int nr_plane_, - float max_data_term_, float data_weight_, float max_disc_term_, float disc_single_jump_, - int min_disp_th_, int msg_type_) - : ndisp(ndisp_), iters(iters_), levels(levels_), nr_plane(nr_plane_), - max_data_term(max_data_term_), data_weight(data_weight_), - max_disc_term(max_disc_term_), disc_single_jump(disc_single_jump_), min_disp_th(min_disp_th_), - msg_type(msg_type_), use_local_init_data_cost(true) -{ - CV_Assert(msg_type_ == CV_32F || msg_type_ == CV_16S); -} - -template -static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream) -{ - CV_DbgAssert(0 < rthis.ndisp && 0 < rthis.iters && 0 < rthis.levels && 0 < rthis.nr_plane - && left.rows == right.rows && left.cols == right.cols && left.type() == right.type()); - - CV_Assert(rthis.levels <= 8 && (left.type() == CV_8UC1 || left.type() == CV_8UC3 || left.type() == CV_8UC4)); - - const Scalar zero = Scalar::all(0); - - cudaStream_t cudaStream = StreamAccessor::getStream(stream); - - //////////////////////////////////////////////////////////////////////////////////////////// - // Init - - int rows = left.rows; - int cols = left.cols; - - rthis.levels = std::min(rthis.levels, int(log((double)rthis.ndisp) / log(2.0))); - int levels = rthis.levels; - - // compute sizes - AutoBuffer buf(levels * 3); - int* cols_pyr = buf; - int* rows_pyr = cols_pyr + levels; - int* nr_plane_pyr = rows_pyr + levels; - - cols_pyr[0] = cols; - rows_pyr[0] = rows; - nr_plane_pyr[0] = rthis.nr_plane; - - for (int i = 1; i < levels; i++) - { - cols_pyr[i] = cols_pyr[i-1] / 2; - rows_pyr[i] = rows_pyr[i-1] / 2; - nr_plane_pyr[i] = nr_plane_pyr[i-1] * 2; - } - - - GpuMat u[2], d[2], l[2], r[2], disp_selected_pyr[2], data_cost, data_cost_selected; - - - //allocate buffers - int buffers_count = 10; // (up + down + left + right + disp_selected_pyr) * 2 - buffers_count += 2; // data_cost has twice more rows than other buffers, what's why +2, not +1; - buffers_count += 1; // data_cost_selected - mbuf.create(rows * rthis.nr_plane * buffers_count, cols, DataType::type); - - data_cost = mbuf.rowRange(0, rows * rthis.nr_plane * 2); - data_cost_selected = mbuf.rowRange(data_cost.rows, data_cost.rows + rows * rthis.nr_plane); - - for(int k = 0; k < 2; ++k) // in/out - { - GpuMat sub1 = mbuf.rowRange(data_cost.rows + data_cost_selected.rows, mbuf.rows); - GpuMat sub2 = sub1.rowRange((k+0)*sub1.rows/2, (k+1)*sub1.rows/2); - - GpuMat *buf_ptrs[] = { &u[k], &d[k], &l[k], &r[k], &disp_selected_pyr[k] }; - for(int _r = 0; _r < 5; ++_r) - { - *buf_ptrs[_r] = sub2.rowRange(_r * sub2.rows/5, (_r+1) * sub2.rows/5); - CV_DbgAssert(buf_ptrs[_r]->cols == cols && buf_ptrs[_r]->rows == rows * rthis.nr_plane); - } - }; - - size_t elem_step = mbuf.step / sizeof(T); - - Size temp_size = data_cost.size(); - if ((size_t)temp_size.area() < elem_step * rows_pyr[levels - 1] * rthis.ndisp) - temp_size = Size(static_cast(elem_step), rows_pyr[levels - 1] * rthis.ndisp); - - temp.create(temp_size, DataType::type); - - //////////////////////////////////////////////////////////////////////////// - // Compute - - load_constants(rthis.ndisp, rthis.max_data_term, rthis.data_weight, rthis.max_disc_term, rthis.disc_single_jump, rthis.min_disp_th, left, right, temp); - - l[0].setTo(zero, stream); - d[0].setTo(zero, stream); - r[0].setTo(zero, stream); - u[0].setTo(zero, stream); - - l[1].setTo(zero, stream); - d[1].setTo(zero, stream); - r[1].setTo(zero, stream); - u[1].setTo(zero, stream); - - data_cost.setTo(zero, stream); - data_cost_selected.setTo(zero, stream); - - int cur_idx = 0; - - for (int i = levels - 1; i >= 0; i--) - { - if (i == levels - 1) - { - init_data_cost(left.rows, left.cols, disp_selected_pyr[cur_idx].ptr(), data_cost_selected.ptr(), - elem_step, rows_pyr[i], cols_pyr[i], i, nr_plane_pyr[i], rthis.ndisp, left.channels(), rthis.use_local_init_data_cost, cudaStream); - } - else - { - compute_data_cost(disp_selected_pyr[cur_idx].ptr(), data_cost.ptr(), elem_step, - left.rows, left.cols, rows_pyr[i], cols_pyr[i], rows_pyr[i+1], i, nr_plane_pyr[i+1], left.channels(), cudaStream); - - int new_idx = (cur_idx + 1) & 1; - - init_message(u[new_idx].ptr(), d[new_idx].ptr(), l[new_idx].ptr(), r[new_idx].ptr(), - u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), - disp_selected_pyr[new_idx].ptr(), disp_selected_pyr[cur_idx].ptr(), - data_cost_selected.ptr(), data_cost.ptr(), elem_step, rows_pyr[i], - cols_pyr[i], nr_plane_pyr[i], rows_pyr[i+1], cols_pyr[i+1], nr_plane_pyr[i+1], cudaStream); - - cur_idx = new_idx; - } - - calc_all_iterations(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), - data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, - rows_pyr[i], cols_pyr[i], nr_plane_pyr[i], rthis.iters, cudaStream); - } - - if (disp.empty()) - disp.create(rows, cols, CV_16S); - - out = ((disp.type() == CV_16S) ? disp : (out.create(rows, cols, CV_16S), out)); - - out.setTo(zero, stream); - - compute_disp(u[cur_idx].ptr(), d[cur_idx].ptr(), l[cur_idx].ptr(), r[cur_idx].ptr(), - data_cost_selected.ptr(), disp_selected_pyr[cur_idx].ptr(), elem_step, out, nr_plane_pyr[0], cudaStream); - - if (disp.type() != CV_16S) - { - out.convertTo(disp, disp.type(), stream); - } -} - - -typedef void (*csbp_operator_t)(StereoConstantSpaceBP& rthis, GpuMat& mbuf, - GpuMat& temp, GpuMat& out, const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream); - -const static csbp_operator_t operators[] = {0, 0, 0, csbp_operator, 0, csbp_operator, 0, 0}; - -void cv::gpu::StereoConstantSpaceBP::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, Stream& stream) -{ - CV_Assert(msg_type == CV_32F || msg_type == CV_16S); - operators[msg_type](*this, messages_buffers, temp, out, left, right, disp, stream); -} - #endif /* !defined (HAVE_CUDA) */ diff --git a/modules/gpustereo/test/test_stereo.cpp b/modules/gpustereo/test/test_stereo.cpp index 4f5e41d8b..9a3d94627 100644 --- a/modules/gpustereo/test/test_stereo.cpp +++ b/modules/gpustereo/test/test_stereo.cpp @@ -155,10 +155,10 @@ GPU_TEST_P(StereoConstantSpaceBP, Regression) ASSERT_FALSE(right_image.empty()); ASSERT_FALSE(disp_gold.empty()); - cv::gpu::StereoConstantSpaceBP csbp(128, 16, 4, 4); + cv::Ptr csbp = cv::gpu::createStereoConstantSpaceBP(128, 16, 4, 4); cv::gpu::GpuMat disp; - csbp(loadMat(left_image), loadMat(right_image), disp); + csbp->compute(loadMat(left_image), loadMat(right_image), disp); cv::Mat h_disp(disp); h_disp.convertTo(h_disp, disp_gold.depth()); diff --git a/samples/gpu/stereo_match.cpp b/samples/gpu/stereo_match.cpp index 86152f3ce..a080153a6 100644 --- a/samples/gpu/stereo_match.cpp +++ b/samples/gpu/stereo_match.cpp @@ -67,7 +67,7 @@ private: Ptr bm; Ptr bp; - gpu::StereoConstantSpaceBP csbp; + Ptr csbp; int64 work_begin; double work_fps; @@ -174,7 +174,7 @@ void App::run() // Set common parameters bm = gpu::createStereoBM(p.ndisp); bp = gpu::createStereoBeliefPropagation(p.ndisp); - csbp.ndisp = p.ndisp; + csbp = cv::gpu::createStereoConstantSpaceBP(p.ndisp); // Prepare disparity map of specified type Mat disp(left.size(), CV_8U); @@ -204,7 +204,7 @@ void App::run() bm->compute(d_left, d_right, d_disp); break; case Params::BP: bp->compute(d_left, d_right, d_disp); break; - case Params::CSBP: csbp(d_left, d_right, d_disp); break; + case Params::CSBP: csbp->compute(d_left, d_right, d_disp); break; } workEnd(); @@ -236,8 +236,8 @@ void App::printParams() const cout << "level_count: " << bp->getNumLevels() << endl; break; case Params::CSBP: - cout << "iter_count: " << csbp.iters << endl; - cout << "level_count: " << csbp.levels << endl; + cout << "iter_count: " << csbp->getNumIters() << endl; + cout << "level_count: " << csbp->getNumLevels() << endl; break; } cout << endl; @@ -306,14 +306,14 @@ void App::handleKey(char key) cout << "ndisp: " << p.ndisp << endl; bm->setNumDisparities(p.ndisp); bp->setNumDisparities(p.ndisp); - csbp.ndisp = p.ndisp; + csbp->setNumDisparities(p.ndisp); break; case 'q': case 'Q': p.ndisp = max(p.ndisp - 8, 1); cout << "ndisp: " << p.ndisp << endl; bm->setNumDisparities(p.ndisp); bp->setNumDisparities(p.ndisp); - csbp.ndisp = p.ndisp; + csbp->setNumDisparities(p.ndisp); break; case '2': if (p.method == Params::BM) @@ -337,8 +337,8 @@ void App::handleKey(char key) } else if (p.method == Params::CSBP) { - csbp.iters += 1; - cout << "iter_count: " << csbp.iters << endl; + csbp->setNumIters(csbp->getNumIters() + 1); + cout << "iter_count: " << csbp->getNumIters() << endl; } break; case 'e': case 'E': @@ -349,8 +349,8 @@ void App::handleKey(char key) } else if (p.method == Params::CSBP) { - csbp.iters = max(csbp.iters - 1, 1); - cout << "iter_count: " << csbp.iters << endl; + csbp->setNumIters(max(csbp->getNumIters() - 1, 1)); + cout << "iter_count: " << csbp->getNumIters() << endl; } break; case '4': @@ -361,8 +361,8 @@ void App::handleKey(char key) } else if (p.method == Params::CSBP) { - csbp.levels += 1; - cout << "level_count: " << csbp.levels << endl; + csbp->setNumLevels(csbp->getNumLevels() + 1); + cout << "level_count: " << csbp->getNumLevels() << endl; } break; case 'r': case 'R': @@ -373,8 +373,8 @@ void App::handleKey(char key) } else if (p.method == Params::CSBP) { - csbp.levels = max(csbp.levels - 1, 1); - cout << "level_count: " << csbp.levels << endl; + csbp->setNumLevels(max(csbp->getNumLevels() - 1, 1)); + cout << "level_count: " << csbp->getNumLevels() << endl; } break; } From 3a02e599e02bc11d0c8af9f4d86fa7e0a9c7dc28 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 8 May 2013 16:48:10 +0400 Subject: [PATCH 084/121] refactored DisparityBilateralFilter --- .../gpustereo/include/opencv2/gpustereo.hpp | 60 ++++---- modules/gpustereo/perf/perf_stereo.cpp | 4 +- .../src/disparity_bilateral_filter.cpp | 144 ++++++++++++------ 3 files changed, 132 insertions(+), 76 deletions(-) diff --git a/modules/gpustereo/include/opencv2/gpustereo.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp index 054504685..c20c6f79b 100644 --- a/modules/gpustereo/include/opencv2/gpustereo.hpp +++ b/modules/gpustereo/include/opencv2/gpustereo.hpp @@ -134,44 +134,48 @@ public: CV_EXPORTS Ptr createStereoConstantSpaceBP(int ndisp = 128, int iters = 8, int levels = 4, int nr_plane = 4, int msg_type = CV_32F); +///////////////////////////////////////// +// DisparityBilateralFilter - - -// Disparity map refinement using joint bilateral filtering given a single color image. -// Qingxiong Yang, Liang Wang, Narendra Ahuja -// http://vision.ai.uiuc.edu/~qyang6/ -class CV_EXPORTS DisparityBilateralFilter +//! Disparity map refinement using joint bilateral filtering given a single color image. +//! Qingxiong Yang, Liang Wang, Narendra Ahuja +//! http://vision.ai.uiuc.edu/~qyang6/ +class CV_EXPORTS DisparityBilateralFilter : public cv::Algorithm { public: - enum { DEFAULT_NDISP = 64 }; - enum { DEFAULT_RADIUS = 3 }; - enum { DEFAULT_ITERS = 1 }; - - //! the default constructor - explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS); - - //! the full constructor taking the number of disparities, filter radius, - //! number of iterations, truncation of data continuity, truncation of disparity continuity - //! and filter range sigma - DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range); - //! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image. //! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type. - void operator()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null()); + virtual void apply(InputArray disparity, InputArray image, OutputArray dst, Stream& stream = Stream::Null()) = 0; -private: - int ndisp; - int radius; - int iters; + virtual int getNumDisparities() const = 0; + virtual void setNumDisparities(int numDisparities) = 0; - float edge_threshold; - float max_disc_threshold; - float sigma_range; + virtual int getRadius() const = 0; + virtual void setRadius(int radius) = 0; - GpuMat table_color; - GpuMat table_space; + virtual int getNumIters() const = 0; + virtual void setNumIters(int iters) = 0; + + //! truncation of data continuity + virtual double getEdgeThreshold() const = 0; + virtual void setEdgeThreshold(double edge_threshold) = 0; + + //! truncation of disparity continuity + virtual double getMaxDiscThreshold() const = 0; + virtual void setMaxDiscThreshold(double max_disc_threshold) = 0; + + //! filter range sigma + virtual double getSigmaRange() const = 0; + virtual void setSigmaRange(double sigma_range) = 0; }; +CV_EXPORTS Ptr + createDisparityBilateralFilter(int ndisp = 64, int radius = 3, int iters = 1); + + + + + //! Reprojects disparity image to 3D space. //! Supports CV_8U and CV_16S types of input disparity. //! The output is a 3- or 4-channel floating-point matrix. diff --git a/modules/gpustereo/perf/perf_stereo.cpp b/modules/gpustereo/perf/perf_stereo.cpp index 276d97207..476a591a1 100644 --- a/modules/gpustereo/perf/perf_stereo.cpp +++ b/modules/gpustereo/perf/perf_stereo.cpp @@ -173,13 +173,13 @@ PERF_TEST_P(ImagePair, DisparityBilateralFilter, if (PERF_RUN_GPU()) { - cv::gpu::DisparityBilateralFilter d_filter(ndisp); + cv::Ptr d_filter = cv::gpu::createDisparityBilateralFilter(ndisp); const cv::gpu::GpuMat d_img(img); const cv::gpu::GpuMat d_disp(disp); cv::gpu::GpuMat dst; - TEST_CYCLE() d_filter(d_disp, d_img, dst); + TEST_CYCLE() d_filter->apply(d_disp, d_img, dst); GPU_SANITY_CHECK(dst); } diff --git a/modules/gpustereo/src/disparity_bilateral_filter.cpp b/modules/gpustereo/src/disparity_bilateral_filter.cpp index d13fcc004..689a9e76e 100644 --- a/modules/gpustereo/src/disparity_bilateral_filter.cpp +++ b/modules/gpustereo/src/disparity_bilateral_filter.cpp @@ -47,10 +47,7 @@ using namespace cv::gpu; #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int) { throw_no_cuda(); } -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int, int, int, float, float, float) { throw_no_cuda(); } - -void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); } +Ptr cv::gpu::createDisparityBilateralFilter(int, int, int) { throw_no_cuda(); return Ptr(); } #else /* !defined (HAVE_CUDA) */ @@ -65,15 +62,46 @@ namespace cv { namespace gpu { namespace cudev } }}} -using namespace ::cv::gpu::cudev::disp_bilateral_filter; - namespace { - const float DEFAULT_EDGE_THRESHOLD = 0.1f; - const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f; - const float DEFAULT_SIGMA_RANGE = 10.0f; + class DispBilateralFilterImpl : public gpu::DisparityBilateralFilter + { + public: + DispBilateralFilterImpl(int ndisp, int radius, int iters); - inline void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len) + void apply(InputArray disparity, InputArray image, OutputArray dst, Stream& stream); + + int getNumDisparities() const { return ndisp_; } + void setNumDisparities(int numDisparities) { ndisp_ = numDisparities; } + + int getRadius() const { return radius_; } + void setRadius(int radius); + + int getNumIters() const { return iters_; } + void setNumIters(int iters) { iters_ = iters; } + + double getEdgeThreshold() const { return edge_threshold_; } + void setEdgeThreshold(double edge_threshold) { edge_threshold_ = (float) edge_threshold; } + + double getMaxDiscThreshold() const { return max_disc_threshold_; } + void setMaxDiscThreshold(double max_disc_threshold) { max_disc_threshold_ = (float) max_disc_threshold; } + + double getSigmaRange() const { return sigma_range_; } + void setSigmaRange(double sigma_range); + + private: + int ndisp_; + int radius_; + int iters_; + float edge_threshold_; + float max_disc_threshold_; + float sigma_range_; + + GpuMat table_color_; + GpuMat table_space_; + }; + + void calc_color_weighted_table(GpuMat& table_color, float sigma_range, int len) { Mat cpu_table_color(1, len, CV_32F); @@ -85,7 +113,7 @@ namespace table_color.upload(cpu_table_color); } - inline void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space) + void calc_space_weighted_filter(GpuMat& table_space, int win_size, float dist_space) { int half = (win_size >> 1); @@ -101,54 +129,78 @@ namespace table_space.upload(cpu_table_space); } - template - void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold,float max_disc_threshold, - GpuMat& table_color, GpuMat& table_space, - const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream) + const float DEFAULT_EDGE_THRESHOLD = 0.1f; + const float DEFAULT_MAX_DISC_THRESHOLD = 0.2f; + const float DEFAULT_SIGMA_RANGE = 10.0f; + + DispBilateralFilterImpl::DispBilateralFilterImpl(int ndisp, int radius, int iters) : + ndisp_(ndisp), radius_(radius), iters_(iters), + edge_threshold_(DEFAULT_EDGE_THRESHOLD), max_disc_threshold_(DEFAULT_MAX_DISC_THRESHOLD), + sigma_range_(DEFAULT_SIGMA_RANGE) { - short edge_disc = std::max(short(1), short(ndisp * edge_threshold + 0.5)); - short max_disc = short(ndisp * max_disc_threshold + 0.5); + calc_color_weighted_table(table_color_, sigma_range_, 255); + calc_space_weighted_filter(table_space_, radius_ * 2 + 1, radius_ + 1.0f); + } + + void DispBilateralFilterImpl::setRadius(int radius) + { + radius_ = radius; + calc_space_weighted_filter(table_space_, radius_ * 2 + 1, radius_ + 1.0f); + } + + void DispBilateralFilterImpl::setSigmaRange(double sigma_range) + { + sigma_range_ = (float) sigma_range; + calc_color_weighted_table(table_color_, sigma_range_, 255); + } + + template + void disp_bilateral_filter_operator(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, + GpuMat& table_color, GpuMat& table_space, + const GpuMat& disp, const GpuMat& img, + OutputArray _dst, Stream& stream) + { + using namespace cv::gpu::cudev::disp_bilateral_filter; + + const short edge_disc = std::max(short(1), short(ndisp * edge_threshold + 0.5)); + const short max_disc = short(ndisp * max_disc_threshold + 0.5); disp_load_constants(table_color.ptr(), table_space, ndisp, radius, edge_disc, max_disc); - if (&dst != &disp) - { + _dst.create(disp.size(), disp.type()); + GpuMat dst = _dst.getGpuMat(); + + if (dst.data != disp.data) disp.copyTo(dst, stream); - } disp_bilateral_filter(dst, img, img.channels(), iters, StreamAccessor::getStream(stream)); } - typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, - GpuMat& table_color, GpuMat& table_space, - const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream); + void DispBilateralFilterImpl::apply(InputArray _disp, InputArray _image, OutputArray dst, Stream& stream) + { + typedef void (*bilateral_filter_operator_t)(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, + GpuMat& table_color, GpuMat& table_space, + const GpuMat& disp, const GpuMat& img, OutputArray dst, Stream& stream); + const bilateral_filter_operator_t operators[] = + {disp_bilateral_filter_operator, 0, 0, disp_bilateral_filter_operator, 0, 0, 0, 0}; - const bilateral_filter_operator_t operators[] = - {disp_bilateral_filter_operator, 0, 0, disp_bilateral_filter_operator, 0, 0, 0, 0}; + CV_Assert( 0 < ndisp_ && 0 < radius_ && 0 < iters_ ); + + GpuMat disp = _disp.getGpuMat(); + GpuMat img = _image.getGpuMat(); + + CV_Assert( disp.type() == CV_8U || disp.type() == CV_16S ); + CV_Assert( img.type() == CV_8UC1 || img.type() == CV_8UC3 ); + CV_Assert( disp.size() == img.size() ); + + operators[disp.type()](ndisp_, radius_, iters_, edge_threshold_, max_disc_threshold_, + table_color_, table_space_, disp, img, dst, stream); + } } -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_) - : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(DEFAULT_EDGE_THRESHOLD), max_disc_threshold(DEFAULT_MAX_DISC_THRESHOLD), - sigma_range(DEFAULT_SIGMA_RANGE) +Ptr cv::gpu::createDisparityBilateralFilter(int ndisp, int radius, int iters) { - calc_color_weighted_table(table_color, sigma_range, 255); - calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f); -} - -cv::gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp_, int radius_, int iters_, float edge_threshold_, - float max_disc_threshold_, float sigma_range_) - : ndisp(ndisp_), radius(radius_), iters(iters_), edge_threshold(edge_threshold_), max_disc_threshold(max_disc_threshold_), - sigma_range(sigma_range_) -{ - calc_color_weighted_table(table_color, sigma_range, 255); - calc_space_weighted_filter(table_space, radius * 2 + 1, radius + 1.0f); -} - -void cv::gpu::DisparityBilateralFilter::operator()(const GpuMat& disp, const GpuMat& img, GpuMat& dst, Stream& stream) -{ - CV_DbgAssert(0 < ndisp && 0 < radius && 0 < iters); - CV_Assert(disp.rows == img.rows && disp.cols == img.cols && (disp.type() == CV_8U || disp.type() == CV_16S) && (img.type() == CV_8UC1 || img.type() == CV_8UC3)); - operators[disp.type()](ndisp, radius, iters, edge_threshold, max_disc_threshold, table_color, table_space, disp, img, dst, stream); + return new DispBilateralFilterImpl(ndisp, radius, iters); } #endif /* !defined (HAVE_CUDA) */ From a380c473b9feb6ab093aa33b0461f1d913757a57 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 8 May 2013 16:51:43 +0400 Subject: [PATCH 085/121] switched to Input/Output Array in reprojectImageTo3D & drawColorDisp --- .../gpustereo/include/opencv2/gpustereo.hpp | 9 +++-- modules/gpustereo/src/util.cpp | 36 +++++++++++-------- 2 files changed, 25 insertions(+), 20 deletions(-) diff --git a/modules/gpustereo/include/opencv2/gpustereo.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp index c20c6f79b..d303c7897 100644 --- a/modules/gpustereo/include/opencv2/gpustereo.hpp +++ b/modules/gpustereo/include/opencv2/gpustereo.hpp @@ -172,21 +172,20 @@ public: CV_EXPORTS Ptr createDisparityBilateralFilter(int ndisp = 64, int radius = 3, int iters = 1); - - - +///////////////////////////////////////// +// Utility //! Reprojects disparity image to 3D space. //! Supports CV_8U and CV_16S types of input disparity. //! The output is a 3- or 4-channel floating-point matrix. //! Each element of this matrix will contain the 3D coordinates of the point (x,y,z,1), computed from the disparity map. //! Q is the 4x4 perspective transformation matrix that can be obtained with cvStereoRectify. -CV_EXPORTS void reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null()); +CV_EXPORTS void reprojectImageTo3D(InputArray disp, OutputArray xyzw, InputArray Q, int dst_cn = 4, Stream& stream = Stream::Null()); //! Does coloring of disparity image: [0..ndisp) -> [0..240, 1, 1] in HSV. //! Supported types of input disparity: CV_8U, CV_16S. //! Output disparity has CV_8UC4 type in BGRA format (alpha = 255). -CV_EXPORTS void drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null()); +CV_EXPORTS void drawColorDisp(InputArray src_disp, OutputArray dst_disp, int ndisp, Stream& stream = Stream::Null()); }} // namespace cv { namespace gpu { diff --git a/modules/gpustereo/src/util.cpp b/modules/gpustereo/src/util.cpp index 9bff6fff2..e58b5a18e 100644 --- a/modules/gpustereo/src/util.cpp +++ b/modules/gpustereo/src/util.cpp @@ -47,8 +47,8 @@ using namespace cv::gpu; #if !defined HAVE_CUDA || defined(CUDA_DISABLER) -void cv::gpu::reprojectImageTo3D(const GpuMat&, GpuMat&, const Mat&, int, Stream&) { throw_no_cuda(); } -void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); } +void cv::gpu::reprojectImageTo3D(InputArray, OutputArray, InputArray, int, Stream&) { throw_no_cuda(); } +void cv::gpu::drawColorDisp(InputArray, OutputArray, int, Stream&) { throw_no_cuda(); } #else @@ -61,7 +61,7 @@ namespace cv { namespace gpu { namespace cudev void reprojectImageTo3D_gpu(const PtrStepSzb disp, PtrStepSzb xyz, const float* q, cudaStream_t stream); }}} -void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, int dst_cn, Stream& stream) +void cv::gpu::reprojectImageTo3D(InputArray _disp, OutputArray _xyz, InputArray _Q, int dst_cn, Stream& stream) { using namespace cv::gpu::cudev; @@ -72,11 +72,15 @@ void cv::gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyz, const Mat& Q, {reprojectImageTo3D_gpu, 0, 0, reprojectImageTo3D_gpu} }; - CV_Assert(disp.type() == CV_8U || disp.type() == CV_16S); - CV_Assert(Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4 && Q.isContinuous()); - CV_Assert(dst_cn == 3 || dst_cn == 4); + GpuMat disp = _disp.getGpuMat(); + Mat Q = _Q.getMat(); - xyz.create(disp.size(), CV_MAKE_TYPE(CV_32F, dst_cn)); + CV_Assert( disp.type() == CV_8U || disp.type() == CV_16S ); + CV_Assert( Q.type() == CV_32F && Q.rows == 4 && Q.cols == 4 && Q.isContinuous() ); + CV_Assert( dst_cn == 3 || dst_cn == 4 ); + + _xyz.create(disp.size(), CV_MAKE_TYPE(CV_32F, dst_cn)); + GpuMat xyz = _xyz.getGpuMat(); funcs[dst_cn == 4][disp.type()](disp, xyz, Q.ptr(), StreamAccessor::getStream(stream)); } @@ -93,23 +97,25 @@ namespace cv { namespace gpu { namespace cudev namespace { template - void drawColorDisp_caller(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream) + void drawColorDisp_caller(const GpuMat& src, OutputArray _dst, int ndisp, const cudaStream_t& stream) { using namespace ::cv::gpu::cudev; - dst.create(src.size(), CV_8UC4); + _dst.create(src.size(), CV_8UC4); + GpuMat dst = _dst.getGpuMat(); drawColorDisp_gpu((PtrStepSz)src, dst, ndisp, stream); } - - typedef void (*drawColorDisp_caller_t)(const GpuMat& src, GpuMat& dst, int ndisp, const cudaStream_t& stream); - - const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller, 0, 0, drawColorDisp_caller, 0, 0, 0, 0}; } -void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& stream) +void cv::gpu::drawColorDisp(InputArray _src, OutputArray dst, int ndisp, Stream& stream) { - CV_Assert(src.type() == CV_8U || src.type() == CV_16S); + typedef void (*drawColorDisp_caller_t)(const GpuMat& src, OutputArray dst, int ndisp, const cudaStream_t& stream); + const drawColorDisp_caller_t drawColorDisp_callers[] = {drawColorDisp_caller, 0, 0, drawColorDisp_caller, 0, 0, 0, 0}; + + GpuMat src = _src.getGpuMat(); + + CV_Assert( src.type() == CV_8U || src.type() == CV_16S ); drawColorDisp_callers[src.type()](src, dst, ndisp, StreamAccessor::getStream(stream)); } From dbc9b4db0cdf28509e2ae60cf95763c525fba098 Mon Sep 17 00:00:00 2001 From: Vikas Dhiman Date: Fri, 14 Jun 2013 11:38:29 -0400 Subject: [PATCH 086/121] support elementwise division for Matx with "/" operator. --- modules/core/include/opencv2/core/matx.hpp | 17 +++++++++++++++- modules/core/test/test_operations.cpp | 23 ++++++++++++++++++++++ 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/matx.hpp b/modules/core/include/opencv2/core/matx.hpp index 6115e3de1..52c63d4ca 100644 --- a/modules/core/include/opencv2/core/matx.hpp +++ b/modules/core/include/opencv2/core/matx.hpp @@ -77,6 +77,7 @@ struct CV_EXPORTS Matx_AddOp {}; struct CV_EXPORTS Matx_SubOp {}; struct CV_EXPORTS Matx_ScaleOp {}; struct CV_EXPORTS Matx_MulOp {}; +struct CV_EXPORTS Matx_DivOp {}; struct CV_EXPORTS Matx_MatMulOp {}; struct CV_EXPORTS Matx_TOp {}; @@ -174,6 +175,7 @@ public: Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_SubOp); template Matx(const Matx<_Tp, m, n>& a, _T2 alpha, Matx_ScaleOp); Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_MulOp); + Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp); template Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp); Matx(const Matx<_Tp, n, m>& a, Matx_TOp); @@ -746,6 +748,13 @@ Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_Mul val[i] = saturate_cast<_Tp>(a.val[i] * b.val[i]); } +template inline +Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b, Matx_DivOp) +{ + for( int i = 0; i < channels; i++ ) + val[i] = saturate_cast<_Tp>(a.val[i] / b.val[i]); +} + template template inline Matx<_Tp,m,n>::Matx(const Matx<_Tp, m, l>& a, const Matx<_Tp, l, n>& b, Matx_MatMulOp) { @@ -1162,6 +1171,12 @@ Vec<_Tp, m> operator * (const Matx<_Tp, m, n>& a, const Vec<_Tp, n>& b) return (const Vec<_Tp, m>&)(c); } +template static inline +Matx<_Tp, m, n> operator / (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) +{ + return Matx<_Tp, m, n>(a, b, Matx_DivOp()); +} + template static inline bool operator == (const Matx<_Tp, m, n>& a, const Matx<_Tp, m, n>& b) { @@ -1337,4 +1352,4 @@ template inline Vec<_Tp, 4>& operator *= (Vec<_Tp, 4>& v1, const V } // cv -#endif // __OPENCV_CORE_MATX_HPP__ \ No newline at end of file +#endif // __OPENCV_CORE_MATX_HPP__ diff --git a/modules/core/test/test_operations.cpp b/modules/core/test/test_operations.cpp index 6b36883cf..84a2e573e 100644 --- a/modules/core/test/test_operations.cpp +++ b/modules/core/test/test_operations.cpp @@ -75,6 +75,7 @@ protected: bool TestSparseMat(); bool TestVec(); bool TestMatxMultiplication(); + bool TestMatxElementwiseDivison(); bool TestSubMatAccess(); bool TestExp(); bool TestSVD(); @@ -891,6 +892,28 @@ bool CV_OperationsTest::TestMatxMultiplication() return true; } +bool CV_OperationsTest::TestMatxElementwiseDivison() +{ + try + { + Matx22f mat(2, 4, 6, 8); // Identity matrix + Matx22f mat2(2, 2, 2, 2); + + Matx22f res = mat / mat2; + + if(res(0, 0) != 1.0) throw test_excep(); + if(res(0, 1) != 2.0) throw test_excep(); + if(res(1, 0) != 3.0) throw test_excep(); + if(res(1, 1) != 4.0) throw test_excep(); + } + catch(const test_excep&) + { + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT); + return false; + } + return true; +} + bool CV_OperationsTest::TestVec() { From 01e97331552fa2c41124eb08fd7c9bee4c0c7e22 Mon Sep 17 00:00:00 2001 From: Andrey Pavlenko Date: Fri, 14 Jun 2013 20:25:23 +0400 Subject: [PATCH 087/121] fix for #3077 removing "-fvisibility=hidden" gcc option due to lack of `__attribute__ ((visibility("default")))` in jni_md.h/JNIEXPORT --- modules/java/CMakeLists.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/java/CMakeLists.txt b/modules/java/CMakeLists.txt index 30aa9efe6..ad10aafc5 100644 --- a/modules/java/CMakeLists.txt +++ b/modules/java/CMakeLists.txt @@ -269,6 +269,10 @@ else(ANDROID) endif(ANDROID) # step 5: build native part + +# workarounding lack of `__attribute__ ((visibility("default")))` in jni_md.h/JNIEXPORT +string(REPLACE "-fvisibility=hidden" "" CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + add_library(${the_module} SHARED ${handwrittren_h_sources} ${handwrittren_cpp_sources} ${generated_cpp_sources} ${copied_files} "${JAR_FILE}" "${JAR_FILE}.dephelper") From 24108f81106d4fcbef0a18384d3c8cc5325898a8 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 17 Jun 2013 09:53:39 +0400 Subject: [PATCH 088/121] updated documentation --- modules/gpuarithm/doc/arithm.rst | 2 - modules/gpufilters/doc/filtering.rst | 887 +++++------------- .../gpufilters/include/opencv2/gpufilters.hpp | 41 +- 3 files changed, 279 insertions(+), 651 deletions(-) diff --git a/modules/gpuarithm/doc/arithm.rst b/modules/gpuarithm/doc/arithm.rst index 2f1d74df5..09b7220c0 100644 --- a/modules/gpuarithm/doc/arithm.rst +++ b/modules/gpuarithm/doc/arithm.rst @@ -157,8 +157,6 @@ Computes a convolution (or cross-correlation) of two images. :param stream: Stream for the asynchronous version. -.. seealso:: :ocv:func:`gpu::filter2D` - gpu::createConvolution diff --git a/modules/gpufilters/doc/filtering.rst b/modules/gpufilters/doc/filtering.rst index 79c2ea51c..925b05f2c 100644 --- a/modules/gpufilters/doc/filtering.rst +++ b/modules/gpufilters/doc/filtering.rst @@ -7,346 +7,236 @@ Functions and classes described in this section are used to perform various line -gpu::BaseRowFilter_GPU ----------------------- -.. ocv:class:: gpu::BaseRowFilter_GPU +gpu::Filter +----------- +.. ocv:class:: gpu::Filter -Base class for linear or non-linear filters that processes rows of 2D arrays. Such filters are used for the "horizontal" filtering passes in separable filters. :: +Common interface for all GPU filters :: - class BaseRowFilter_GPU + class CV_EXPORTS Filter : public Algorithm { public: - BaseRowFilter_GPU(int ksize_, int anchor_); - virtual ~BaseRowFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; + virtual void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0; }; -.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`gpu::FilterEngine_GPU`. - - -gpu::BaseColumnFilter_GPU -------------------------- -.. ocv:class:: gpu::BaseColumnFilter_GPU - -Base class for linear or non-linear filters that processes columns of 2D arrays. Such filters are used for the "vertical" filtering passes in separable filters. :: - - class BaseColumnFilter_GPU - { - public: - BaseColumnFilter_GPU(int ksize_, int anchor_); - virtual ~BaseColumnFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - int ksize, anchor; - }; - - -.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`gpu::FilterEngine_GPU`. - - - -gpu::BaseFilter_GPU -------------------- -.. ocv:class:: gpu::BaseFilter_GPU - -Base class for non-separable 2D filters. :: - - class CV_EXPORTS BaseFilter_GPU - { - public: - BaseFilter_GPU(const Size& ksize_, const Point& anchor_); - virtual ~BaseFilter_GPU() {} - virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()) = 0; - Size ksize; - Point anchor; - }; - - -.. note:: This class does not allocate memory for a destination image. Usually this class is used inside :ocv:class:`gpu::FilterEngine_GPU`. - - - -gpu::FilterEngine_GPU ---------------------- -.. ocv:class:: gpu::FilterEngine_GPU - -Base class for the Filter Engine. :: - - class CV_EXPORTS FilterEngine_GPU - { - public: - virtual ~FilterEngine_GPU() {} - - virtual void apply(const GpuMat& src, GpuMat& dst, - Rect roi = Rect(0,0,-1,-1), Stream& stream = Stream::Null()) = 0; - }; - - -The class can be used to apply an arbitrary filtering operation to an image. It contains all the necessary intermediate buffers. Pointers to the initialized ``FilterEngine_GPU`` instances are returned by various ``create*Filter_GPU`` functions (see below), and they are used inside high-level functions such as :ocv:func:`gpu::filter2D`, :ocv:func:`gpu::erode`, :ocv:func:`gpu::Sobel` , and others. - -By using ``FilterEngine_GPU`` instead of functions you can avoid unnecessary memory allocation for intermediate buffers and get better performance: :: - - while (...) - { - gpu::GpuMat src = getImg(); - gpu::GpuMat dst; - // Allocate and release buffers at each iterations - gpu::GaussianBlur(src, dst, ksize, sigma1); - } - - // Allocate buffers only once - cv::Ptr filter = - gpu::createGaussianFilter_GPU(CV_8UC4, ksize, sigma1); - while (...) - { - gpu::GpuMat src = getImg(); - gpu::GpuMat dst; - filter->apply(src, dst, cv::Rect(0, 0, src.cols, src.rows)); - } - // Release buffers only once - filter.release(); - - -``FilterEngine_GPU`` can process a rectangular sub-region of an image. By default, if ``roi == Rect(0,0,-1,-1)`` , ``FilterEngine_GPU`` processes the inner region of an image ( ``Rect(anchor.x, anchor.y, src_size.width - ksize.width, src_size.height - ksize.height)`` ) because some filters do not check whether indices are outside the image for better performance. See below to understand which filters support processing the whole image and which do not and identify image type limitations. - -.. note:: The GPU filters do not support the in-place mode. - -.. seealso:: :ocv:class:`gpu::BaseRowFilter_GPU`, :ocv:class:`gpu::BaseColumnFilter_GPU`, :ocv:class:`gpu::BaseFilter_GPU`, :ocv:func:`gpu::createFilter2D_GPU`, :ocv:func:`gpu::createSeparableFilter_GPU`, :ocv:func:`gpu::createBoxFilter_GPU`, :ocv:func:`gpu::createMorphologyFilter_GPU`, :ocv:func:`gpu::createLinearFilter_GPU`, :ocv:func:`gpu::createSeparableLinearFilter_GPU`, :ocv:func:`gpu::createDerivFilter_GPU`, :ocv:func:`gpu::createGaussianFilter_GPU` - - - -gpu::createFilter2D_GPU ---------------------------- -Creates a non-separable filter engine with the specified filter. - -.. ocv:function:: Ptr gpu::createFilter2D_GPU( const Ptr& filter2D, int srcType, int dstType) - - :param filter2D: Non-separable 2D filter. - - :param srcType: Input image type. It must be supported by ``filter2D`` . - - :param dstType: Output image type. It must be supported by ``filter2D`` . - -Usually this function is used inside such high-level functions as :ocv:func:`gpu::createLinearFilter_GPU`, :ocv:func:`gpu::createBoxFilter_GPU`. - - - -gpu::createSeparableFilter_GPU ----------------------------------- -Creates a separable filter engine with the specified filters. - -.. ocv:function:: Ptr gpu::createSeparableFilter_GPU( const Ptr& rowFilter, const Ptr& columnFilter, int srcType, int bufType, int dstType) - - :param rowFilter: "Horizontal" 1D filter. - - :param columnFilter: "Vertical" 1D filter. - - :param srcType: Input image type. It must be supported by ``rowFilter`` . - - :param bufType: Buffer image type. It must be supported by ``rowFilter`` and ``columnFilter`` . - - :param dstType: Output image type. It must be supported by ``columnFilter`` . - -Usually this function is used inside such high-level functions as :ocv:func:`gpu::createSeparableLinearFilter_GPU`. - - - -gpu::getRowSumFilter_GPU ----------------------------- -Creates a horizontal 1D box filter. - -.. ocv:function:: Ptr gpu::getRowSumFilter_GPU(int srcType, int sumType, int ksize, int anchor = -1) - - :param srcType: Input image type. Only ``CV_8UC1`` type is supported for now. - - :param sumType: Output image type. Only ``CV_32FC1`` type is supported for now. - - :param ksize: Kernel size. - - :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - - - -gpu::getColumnSumFilter_GPU -------------------------------- -Creates a vertical 1D box filter. - -.. ocv:function:: Ptr gpu::getColumnSumFilter_GPU(int sumType, int dstType, int ksize, int anchor = -1) - - :param sumType: Input image type. Only ``CV_8UC1`` type is supported for now. - - :param dstType: Output image type. Only ``CV_32FC1`` type is supported for now. - - :param ksize: Kernel size. - - :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - - - -gpu::createBoxFilter_GPU ----------------------------- -Creates a normalized 2D box filter. - -.. ocv:function:: Ptr gpu::createBoxFilter_GPU(int srcType, int dstType, const Size& ksize, const Point& anchor = Point(-1,-1)) - -.. ocv:function:: Ptr gpu::getBoxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1, -1)) - - :param srcType: Input image type supporting ``CV_8UC1`` and ``CV_8UC4`` . - - :param dstType: Output image type. It supports only the same values as the source type. - - :param ksize: Kernel size. - - :param anchor: Anchor point. The default value ``Point(-1, -1)`` means that the anchor is at the kernel center. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - -.. seealso:: :ocv:func:`boxFilter` - - - -gpu::boxFilter +gpu::Filter::apply ------------------ -Smooths the image using the normalized box filter. +Applies the specified filter to the image. -.. ocv:function:: void gpu::boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::Filter::apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null()) = 0 - :param src: Input image. ``CV_8UC1`` and ``CV_8UC4`` source types are supported. + :param src: Input image. - :param dst: Output image type. The size and type is the same as ``src`` . - - :param ddepth: Output image depth. If -1, the output image has the same depth as the input one. The only values allowed here are ``CV_8U`` and -1. - - :param ksize: Kernel size. - - :param anchor: Anchor point. The default value ``Point(-1, -1)`` means that the anchor is at the kernel center. + :param dst: Output image. :param stream: Stream for the asynchronous version. -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. + + +gpu::createBoxFilter +-------------------- +Creates a normalized 2D box filter. + +.. ocv:function:: Ptr gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1,-1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)) + + :param srcType: Input image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported for now. + + :param dstType: Output image type. Only the same type as ``src`` is supported for now. + + :param ksize: Kernel size. + + :param anchor: Anchor point. The default value ``Point(-1, -1)`` means that the anchor is at the kernel center. + + :param borderMode: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . + + :param borderVal: Default border value. .. seealso:: :ocv:func:`boxFilter` -gpu::blur -------------- -Acts as a synonym for the normalized box filter. +gpu::createLinearFilter +----------------------- +Creates a non-separable linear 2D filter. -.. ocv:function:: void gpu::blur(const GpuMat& src, GpuMat& dst, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) +.. ocv:function:: Ptr gpu::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)) - :param src: Input image. ``CV_8UC1`` and ``CV_8UC4`` source types are supported. + :param srcType: Input image type. Supports ``CV_8U`` , ``CV_16U`` and ``CV_32F`` one and four channel image. - :param dst: Output image type with the same size and type as ``src`` . + :param dstType: Output image type. Only the same type as ``src`` is supported for now. - :param ksize: Kernel size. + :param kernel: 2D array of filter coefficients. :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center. - :param stream: Stream for the asynchronous version. + :param borderMode: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. + :param borderVal: Default border value. -.. seealso:: :ocv:func:`blur`, :ocv:func:`gpu::boxFilter` +.. seealso:: :ocv:func:`filter2D` -gpu::createMorphologyFilter_GPU ------------------------------------ +gpu::createLaplacianFilter +-------------------------- +Creates a Laplacian operator. + +.. ocv:function:: Ptr gpu::createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)) + + :param srcType: Input image type. Supports ``CV_8U`` , ``CV_16U`` and ``CV_32F`` one and four channel image. + + :param dstType: Output image type. Only the same type as ``src`` is supported for now. + + :param ksize: Aperture size used to compute the second-derivative filters (see :ocv:func:`getDerivKernels`). It must be positive and odd. Only ``ksize`` = 1 and ``ksize`` = 3 are supported. + + :param scale: Optional scale factor for the computed Laplacian values. By default, no scaling is applied (see :ocv:func:`getDerivKernels` ). + + :param borderMode: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . + + :param borderVal: Default border value. + +.. seealso:: :ocv:func:`Laplacian` + + + +gpu::createSeparableLinearFilter +-------------------------------- +Creates a separable linear filter. + +.. ocv:function:: Ptr gpu::createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor = Point(-1,-1), int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1) + + :param srcType: Source array type. + + :param dstType: Destination array type. + + :param rowKernel: Horizontal filter coefficients. Support kernels with ``size <= 32`` . + + :param columnKernel: Vertical filter coefficients. Support kernels with ``size <= 32`` . + + :param anchor: Anchor position within the kernel. Negative values mean that anchor is positioned at the aperture center. + + :param rowBorderMode: Pixel extrapolation method in the vertical direction For details, see :ocv:func:`borderInterpolate`. + + :param columnBorderMode: Pixel extrapolation method in the horizontal direction. + +.. seealso:: :ocv:func:`sepFilter2D` + + + +gpu::createDerivFilter +---------------------- +Creates a generalized Deriv operator. + +.. ocv:function:: Ptr gpu::createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize = false, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1) + + :param srcType: Source image type. + + :param dstType: Destination array type. + + :param dx: Derivative order in respect of x. + + :param dy: Derivative order in respect of y. + + :param ksize: Aperture size. See :ocv:func:`getDerivKernels` for details. + + :param normalize: Flag indicating whether to normalize (scale down) the filter coefficients or not. See :ocv:func:`getDerivKernels` for details. + + :param scale: Optional scale factor for the computed derivative values. By default, no scaling is applied. For details, see :ocv:func:`getDerivKernels` . + + :param rowBorderMode: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. + + :param columnBorderMode: Pixel extrapolation method in the horizontal direction. + + + +gpu::createSobelFilter +---------------------- +Creates a Sobel operator. + +.. ocv:function:: Ptr gpu::createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize = 3, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1) + + :param srcType: Source image type. + + :param dstType: Destination array type. + + :param dx: Derivative order in respect of x. + + :param dy: Derivative order in respect of y. + + :param ksize: Size of the extended Sobel kernel. Possible values are 1, 3, 5 or 7. + + :param scale: Optional scale factor for the computed derivative values. By default, no scaling is applied. For details, see :ocv:func:`getDerivKernels` . + + :param rowBorderMode: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. + + :param columnBorderMode: Pixel extrapolation method in the horizontal direction. + +.. seealso:: :ocv:func:`Sobel` + + + +gpu::createScharrFilter +----------------------- +Creates a vertical or horizontal Scharr operator. + +.. ocv:function:: Ptr gpu::createScharrFilter(int srcType, int dstType, int dx, int dy, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1) + + :param srcType: Source image type. + + :param dstType: Destination array type. + + :param dx: Order of the derivative x. + + :param dy: Order of the derivative y. + + :param scale: Optional scale factor for the computed derivative values. By default, no scaling is applied. See :ocv:func:`getDerivKernels` for details. + + :param rowBorderMode: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. + + :param columnBorderMode: Pixel extrapolation method in the horizontal direction. + +.. seealso:: :ocv:func:`Scharr` + + + +gpu::createGaussianFilter +------------------------- +Creates a Gaussian filter. + +.. ocv:function:: Ptr gpu::createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2 = 0, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1) + + :param srcType: Source image type. + + :param dstType: Destination array type. + + :param ksize: Aperture size. See :ocv:func:`getGaussianKernel` for details. + + :param sigma1: Gaussian sigma in the horizontal direction. See :ocv:func:`getGaussianKernel` for details. + + :param sigma2: Gaussian sigma in the vertical direction. If 0, then :math:`\texttt{sigma2}\leftarrow\texttt{sigma1}` . + + :param rowBorderMode: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. + + :param columnBorderMode: Pixel extrapolation method in the horizontal direction. + +.. seealso:: :ocv:func:`GaussianBlur` + + + +gpu::createMorphologyFilter +--------------------------- Creates a 2D morphological filter. -.. ocv:function:: Ptr gpu::createMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Point& anchor = Point(-1,-1), int iterations = 1) - -.. ocv:function:: Ptr gpu::getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize, Point anchor=Point(-1,-1)) - - :param op: Morphology operation id. Only ``MORPH_ERODE`` and ``MORPH_DILATE`` are supported. - - :param type: Input/output image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported. - - :param kernel: 2D 8-bit structuring element for the morphological operation. - - :param ksize: Size of a horizontal or vertical structuring element used for separable morphological operations. - - :param anchor: Anchor position within the structuring element. Negative values mean that the anchor is at the center. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - -.. seealso:: :ocv:func:`createMorphologyFilter` - - - -gpu::erode --------------- -Erodes an image by using a specific structuring element. - -.. ocv:function:: void gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor=Point(-1, -1), int iterations=1 ) - -.. ocv:function:: void gpu::erode( const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor=Point(-1, -1), int iterations=1, Stream& stream=Stream::Null() ) - - :param src: Source image. Only ``CV_8UC1`` and ``CV_8UC4`` types are supported. - - :param dst: Destination image with the same size and type as ``src`` . - - :param kernel: Structuring element used for erosion. If ``kernel=Mat()``, a 3x3 rectangular structuring element is used. - - :param anchor: Position of an anchor within the element. The default value ``(-1, -1)`` means that the anchor is at the element center. - - :param iterations: Number of times erosion to be applied. - - :param stream: Stream for the asynchronous version. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - -.. seealso:: :ocv:func:`erode` - - - -gpu::dilate ---------------- -Dilates an image by using a specific structuring element. - -.. ocv:function:: void gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, Point anchor=Point(-1, -1), int iterations=1 ) - -.. ocv:function:: void gpu::dilate( const GpuMat& src, GpuMat& dst, const Mat& kernel, GpuMat& buf, Point anchor=Point(-1, -1), int iterations=1, Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8UC1`` and ``CV_8UC4`` source types are supported. - - :param dst: Destination image with the same size and type as ``src``. - - :param kernel: Structuring element used for dilation. If ``kernel=Mat()``, a 3x3 rectangular structuring element is used. - - :param anchor: Position of an anchor within the element. The default value ``(-1, -1)`` means that the anchor is at the element center. - - :param iterations: Number of times dilation to be applied. - - :param stream: Stream for the asynchronous version. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - -.. seealso:: :ocv:func:`dilate` - - - -gpu::morphologyEx ---------------------- -Applies an advanced morphological operation to an image. - -.. ocv:function:: void gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, Point anchor=Point(-1, -1), int iterations=1 ) - -.. ocv:function:: void gpu::morphologyEx( const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2, Point anchor=Point(-1, -1), int iterations=1, Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8UC1`` and ``CV_8UC4`` source types are supported. - - :param dst: Destination image with the same size and type as ``src`` . +.. ocv:function:: Ptr gpu::createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1) :param op: Type of morphological operation. The following types are possible: + * **MORPH_ERODE** erode + + * **MORPH_DILATE** dilate + * **MORPH_OPEN** opening * **MORPH_CLOSE** closing @@ -357,363 +247,88 @@ Applies an advanced morphological operation to an image. * **MORPH_BLACKHAT** "black hat" - :param kernel: Structuring element. + :param srcType: Input/output image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported. - :param anchor: Position of an anchor within the element. The default value ``Point(-1, -1)`` means that the anchor is at the element center. + :param kernel: 2D 8-bit structuring element for the morphological operation. + + :param anchor: Anchor position within the structuring element. Negative values mean that the anchor is at the center. :param iterations: Number of times erosion and dilation to be applied. - :param stream: Stream for the asynchronous version. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - .. seealso:: :ocv:func:`morphologyEx` -gpu::createLinearFilter_GPU -------------------------------- -Creates a non-separable linear filter. - -.. ocv:function:: Ptr gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT) - - :param srcType: Input image type. Supports ``CV_8U`` , ``CV_16U`` and ``CV_32F`` one and four channel image. - - :param dstType: Output image type. The same type as ``src`` is supported. - - :param kernel: 2D array of filter coefficients. Floating-point coefficients will be converted to fixed-point representation before the actual processing. Supports size up to 16. For larger kernels use :ocv:class:`gpu::Convolution`. - - :param anchor: Anchor point. The default value Point(-1, -1) means that the anchor is at the kernel center. - - :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . - -.. seealso:: :ocv:func:`createLinearFilter` - - - -gpu::filter2D ------------------ -Applies the non-separable 2D linear filter to an image. - -.. ocv:function:: void gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()) - - :param src: Source image. Supports ``CV_8U`` , ``CV_16U`` and ``CV_32F`` one and four channel image. - - :param dst: Destination image. The size and the number of channels is the same as ``src`` . - - :param ddepth: Desired depth of the destination image. If it is negative, it is the same as ``src.depth()`` . It supports only the same depth as the source image depth. - - :param kernel: 2D array of filter coefficients. - - :param anchor: Anchor of the kernel that indicates the relative position of a filtered point within the kernel. The anchor resides within the kernel. The special default value (-1,-1) means that the anchor is at the kernel center. - - :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`filter2D`, :ocv:class:`gpu::Convolution` - - - -gpu::Laplacian ------------------- -Applies the Laplacian operator to an image. - -.. ocv:function:: void gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null()) - - :param src: Source image. ``CV_8UC1`` and ``CV_8UC4`` source types are supported. - - :param dst: Destination image. The size and number of channels is the same as ``src`` . - - :param ddepth: Desired depth of the destination image. It supports only the same depth as the source image depth. - - :param ksize: Aperture size used to compute the second-derivative filters (see :ocv:func:`getDerivKernels`). It must be positive and odd. Only ``ksize`` = 1 and ``ksize`` = 3 are supported. - - :param scale: Optional scale factor for the computed Laplacian values. By default, no scaling is applied (see :ocv:func:`getDerivKernels` ). - - :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . - - :param stream: Stream for the asynchronous version. - -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. - -.. seealso:: :ocv:func:`Laplacian`, :ocv:func:`gpu::filter2D` - - - -gpu::getLinearRowFilter_GPU -------------------------------- -Creates a primitive row filter with the specified kernel. - -.. ocv:function:: Ptr gpu::getLinearRowFilter_GPU( int srcType, int bufType, const Mat& rowKernel, int anchor=-1, int borderType=BORDER_DEFAULT ) - - :param srcType: Source array type. Only ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. - - :param bufType: Intermediate buffer type with as many channels as ``srcType`` . - - :param rowKernel: Filter coefficients. Support kernels with ``size <= 16`` . - - :param anchor: Anchor position within the kernel. Negative values mean that the anchor is positioned at the aperture center. - - :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate`. For details on limitations, see below. - -There are two versions of the algorithm: NPP and OpenCV. - - * NPP version is called when ``srcType == CV_8UC1`` or ``srcType == CV_8UC4`` and ``bufType == srcType`` . Otherwise, the OpenCV version is called. NPP supports only ``BORDER_CONSTANT`` border type and does not check indices outside the image. - - * OpenCV version supports only ``CV_32F`` buffer depth and ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , and ``BORDER_CONSTANT`` border types. It checks indices outside the image. - -.. seealso:: :ocv:func:`createSeparableLinearFilter` . - - - -gpu::getLinearColumnFilter_GPU ----------------------------------- -Creates a primitive column filter with the specified kernel. - -.. ocv:function:: Ptr gpu::getLinearColumnFilter_GPU( int bufType, int dstType, const Mat& columnKernel, int anchor=-1, int borderType=BORDER_DEFAULT ) - - :param bufType: Intermediate buffer type with as many channels as ``dstType`` . - - :param dstType: Destination array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` destination types are supported. - - :param columnKernel: Filter coefficients. Support kernels with ``size <= 16`` . - - :param anchor: Anchor position within the kernel. Negative values mean that the anchor is positioned at the aperture center. - - :param borderType: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . For details on limitations, see below. - -There are two versions of the algorithm: NPP and OpenCV. - - * NPP version is called when ``dstType == CV_8UC1`` or ``dstType == CV_8UC4`` and ``bufType == dstType`` . Otherwise, the OpenCV version is called. NPP supports only ``BORDER_CONSTANT`` border type and does not check indices outside the image. - - * OpenCV version supports only ``CV_32F`` buffer depth and ``BORDER_REFLECT101`` , ``BORDER_REPLICATE`` , and ``BORDER_CONSTANT`` border types. It checks indices outside image. - -.. seealso:: :ocv:func:`gpu::getLinearRowFilter_GPU`, :ocv:func:`createSeparableLinearFilter` - - - -gpu::createSeparableLinearFilter_GPU ----------------------------------------- -Creates a separable linear filter engine. - -.. ocv:function:: Ptr gpu::createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel, const Mat& columnKernel, const Point& anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) - - :param srcType: Source array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. - - :param dstType: Destination array type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` destination types are supported. - - :param rowKernel: Horizontal filter coefficients. Support kernels with ``size <= 16`` . - - :param columnKernel: Vertical filter coefficients. Support kernels with ``size <= 16`` . - - :param anchor: Anchor position within the kernel. Negative values mean that anchor is positioned at the aperture center. - - :param rowBorderType: Pixel extrapolation method in the vertical direction For details, see :ocv:func:`borderInterpolate`. For details on limitations, see :ocv:func:`gpu::getLinearRowFilter_GPU`, cpp:ocv:func:`gpu::getLinearColumnFilter_GPU`. - - :param columnBorderType: Pixel extrapolation method in the horizontal direction. - -.. seealso:: :ocv:func:`gpu::getLinearRowFilter_GPU`, :ocv:func:`gpu::getLinearColumnFilter_GPU`, :ocv:func:`createSeparableLinearFilter` - - - -gpu::sepFilter2D --------------------- -Applies a separable 2D linear filter to an image. - -.. ocv:function:: void gpu::sepFilter2D( const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, Point anchor=Point(-1,-1), int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1 ) - -.. ocv:function:: void gpu::sepFilter2D( const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY, GpuMat& buf, Point anchor=Point(-1,-1), int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1, Stream& stream=Stream::Null() ) - - - :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. - - :param dst: Destination image with the same size and number of channels as ``src`` . - - :param ddepth: Destination image depth. ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and ``CV_32F`` are supported. - - :param kernelX: Horizontal filter coefficients. - - :param kernelY: Vertical filter coefficients. - - :param anchor: Anchor position within the kernel. The default value ``(-1, 1)`` means that the anchor is at the kernel center. - - :param rowBorderType: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. - - :param columnBorderType: Pixel extrapolation method in the horizontal direction. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::createSeparableLinearFilter_GPU`, :ocv:func:`sepFilter2D` - - - -gpu::createDerivFilter_GPU ------------------------------- -Creates a filter engine for the generalized Sobel operator. - -.. ocv:function:: Ptr gpu::createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1) - - :param srcType: Source image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. - - :param dstType: Destination image type with as many channels as ``srcType`` , ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and ``CV_32F`` depths are supported. - - :param dx: Derivative order in respect of x. - - :param dy: Derivative order in respect of y. - - :param ksize: Aperture size. See :ocv:func:`getDerivKernels` for details. - - :param rowBorderType: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. - - :param columnBorderType: Pixel extrapolation method in the horizontal direction. - -.. seealso:: :ocv:func:`gpu::createSeparableLinearFilter_GPU`, :ocv:func:`createDerivFilter` - - - -gpu::Sobel --------------- -Applies the generalized Sobel operator to an image. - -.. ocv:function:: void gpu::Sobel( const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize=3, double scale=1, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1 ) - -.. ocv:function:: void gpu::Sobel( const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, int ksize=3, double scale=1, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1, Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. - - :param dst: Destination image with the same size and number of channels as source image. - - :param ddepth: Destination image depth. ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and ``CV_32F`` are supported. - - :param dx: Derivative order in respect of x. - - :param dy: Derivative order in respect of y. - - :param ksize: Size of the extended Sobel kernel. Possible values are 1, 3, 5 or 7. - - :param scale: Optional scale factor for the computed derivative values. By default, no scaling is applied. For details, see :ocv:func:`getDerivKernels` . - - :param rowBorderType: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. - - :param columnBorderType: Pixel extrapolation method in the horizontal direction. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::createSeparableLinearFilter_GPU`, :ocv:func:`Sobel` - - - -gpu::Scharr ---------------- -Calculates the first x- or y- image derivative using the Scharr operator. - -.. ocv:function:: void gpu::Scharr( const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, double scale=1, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1 ) - -.. ocv:function:: void gpu::Scharr( const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, GpuMat& buf, double scale=1, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1, Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. - - :param dst: Destination image with the same size and number of channels as ``src`` has. - - :param ddepth: Destination image depth. ``CV_8U`` , ``CV_16S`` , ``CV_32S`` , and ``CV_32F`` are supported. - - :param dx: Order of the derivative x. - - :param dy: Order of the derivative y. - - :param scale: Optional scale factor for the computed derivative values. By default, no scaling is applied. See :ocv:func:`getDerivKernels` for details. - - :param rowBorderType: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. - - :param columnBorderType: Pixel extrapolation method in the horizontal direction. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::createSeparableLinearFilter_GPU`, :ocv:func:`Scharr` - - - -gpu::createGaussianFilter_GPU ---------------------------------- -Creates a Gaussian filter engine. - -.. ocv:function:: Ptr gpu::createGaussianFilter_GPU( int type, Size ksize, double sigma1, double sigma2=0, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1 ) - - :param type: Source and destination image type. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` are supported. - - :param ksize: Aperture size. See :ocv:func:`getGaussianKernel` for details. - - :param sigma1: Gaussian sigma in the horizontal direction. See :ocv:func:`getGaussianKernel` for details. - - :param sigma2: Gaussian sigma in the vertical direction. If 0, then :math:`\texttt{sigma2}\leftarrow\texttt{sigma1}` . - - :param rowBorderType: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. - - :param columnBorderType: Pixel extrapolation method in the horizontal direction. - -.. seealso:: :ocv:func:`gpu::createSeparableLinearFilter_GPU`, :ocv:func:`createGaussianFilter` - - - -gpu::GaussianBlur ---------------------- -Smooths an image using the Gaussian filter. - -.. ocv:function:: void gpu::GaussianBlur( const GpuMat& src, GpuMat& dst, Size ksize, double sigma1, double sigma2=0, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1 ) - -.. ocv:function:: void gpu::GaussianBlur( const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2=0, int rowBorderType=BORDER_DEFAULT, int columnBorderType=-1, Stream& stream=Stream::Null() ) - - :param src: Source image. ``CV_8UC1`` , ``CV_8UC4`` , ``CV_16SC1`` , ``CV_16SC2`` , ``CV_16SC3`` , ``CV_32SC1`` , ``CV_32FC1`` source types are supported. - - :param dst: Destination image with the same size and type as ``src`` . - - :param ksize: Gaussian kernel size. ``ksize.width`` and ``ksize.height`` can differ but they both must be positive and odd. If they are zeros, they are computed from ``sigma1`` and ``sigma2`` . - - :param sigma1: Gaussian kernel standard deviation in X direction. - - :param sigma2: Gaussian kernel standard deviation in Y direction. If ``sigma2`` is zero, it is set to be equal to ``sigma1`` . If they are both zeros, they are computed from ``ksize.width`` and ``ksize.height``, respectively. See :ocv:func:`getGaussianKernel` for details. To fully control the result regardless of possible future modification of all this semantics, you are recommended to specify all of ``ksize`` , ``sigma1`` , and ``sigma2`` . - - :param rowBorderType: Pixel extrapolation method in the vertical direction. For details, see :ocv:func:`borderInterpolate`. - - :param columnBorderType: Pixel extrapolation method in the horizontal direction. - - :param stream: Stream for the asynchronous version. - -.. seealso:: :ocv:func:`gpu::createGaussianFilter_GPU`, :ocv:func:`GaussianBlur` - - - -gpu::getMaxFilter_GPU -------------------------- +gpu::createBoxMaxFilter +----------------------- Creates the maximum filter. -.. ocv:function:: Ptr gpu::getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)) +.. ocv:function:: Ptr gpu::createBoxMaxFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)) - :param srcType: Input image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported. - - :param dstType: Output image type. It supports only the same type as the source type. + :param srcType: Input/output image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported. :param ksize: Kernel size. :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. + :param borderMode: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . + + :param borderVal: Default border value. -gpu::getMinFilter_GPU -------------------------- +gpu::createBoxMinFilter +----------------------- Creates the minimum filter. -.. ocv:function:: Ptr gpu::getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1)) +.. ocv:function:: Ptr gpu::createBoxMinFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)) - :param srcType: Input image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported. - - :param dstType: Output image type. It supports only the same type as the source type. + :param srcType: Input/output image type. Only ``CV_8UC1`` and ``CV_8UC4`` are supported. :param ksize: Kernel size. :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. -.. note:: This filter does not check out-of-border accesses, so only a proper sub-matrix of a bigger matrix has to be passed to it. + :param borderMode: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . + + :param borderVal: Default border value. + + + +gpu::createRowSumFilter +----------------------- +Creates a horizontal 1D box filter. + +.. ocv:function:: Ptr gpu::createRowSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)) + + :param srcType: Input image type. Only ``CV_8UC1`` type is supported for now. + + :param sumType: Output image type. Only ``CV_32FC1`` type is supported for now. + + :param ksize: Kernel size. + + :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. + + :param borderMode: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . + + :param borderVal: Default border value. + + + +gpu::createColumnSumFilter +-------------------------- +Creates a vertical 1D box filter. + +.. ocv:function:: Ptr gpu::createColumnSumFilter(int srcType, int dstType, int ksize, int anchor = -1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)) + + :param srcType: Input image type. Only ``CV_8UC1`` type is supported for now. + + :param sumType: Output image type. Only ``CV_32FC1`` type is supported for now. + + :param ksize: Kernel size. + + :param anchor: Anchor point. The default value (-1) means that the anchor is at the kernel center. + + :param borderMode: Pixel extrapolation method. For details, see :ocv:func:`borderInterpolate` . + + :param borderVal: Default border value. diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index 76b5b731d..a2cc8db6b 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -72,11 +72,13 @@ public: //////////////////////////////////////////////////////////////////////////////////////////////////// // Box Filter -//! smooths the image using the normalized box filter +//! creates a normalized 2D box filter //! supports CV_8UC1, CV_8UC4 types CV_EXPORTS Ptr createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1,-1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); +// obsolete + __OPENCV_GPUFILTERS_DEPR_BEFORE__ void boxFilter(InputArray src, OutputArray dst, int dstType, Size ksize, Point anchor = Point(-1,-1), Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; @@ -100,9 +102,12 @@ inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stre //////////////////////////////////////////////////////////////////////////////////////////////////// // Linear Filter -//! non-separable linear 2D filter +//! Creates a non-separable linear 2D filter +//! supports 1 and 4 channel CV_8U, CV_16U and CV_32F input CV_EXPORTS Ptr createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1), - int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + +// obsolete __OPENCV_GPUFILTERS_DEPR_BEFORE__ void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT, @@ -117,10 +122,12 @@ inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray ker //////////////////////////////////////////////////////////////////////////////////////////////////// // Laplacian Filter -//! applies Laplacian operator to the image +//! creates a Laplacian operator //! supports only ksize = 1 and ksize = 3 CV_EXPORTS Ptr createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1, - int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); + +// obsolete __OPENCV_GPUFILTERS_DEPR_BEFORE__ void Laplacian(InputArray src, OutputArray dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, @@ -135,10 +142,12 @@ inline void Laplacian(InputArray src, OutputArray dst, int ddepth, int ksize, do //////////////////////////////////////////////////////////////////////////////////////////////////// // Separable Linear Filter -//! separable linear 2D filter +//! creates a separable linear filter CV_EXPORTS Ptr createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor = Point(-1,-1), int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); +// obsolete + __OPENCV_GPUFILTERS_DEPR_BEFORE__ void sepFilter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernelX, InputArray kernelY, Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; @@ -152,19 +161,21 @@ inline void sepFilter2D(InputArray src, OutputArray dst, int ddepth, InputArray //////////////////////////////////////////////////////////////////////////////////////////////////// // Deriv Filter -//! the generalized Deriv operator +//! creates a generalized Deriv operator CV_EXPORTS Ptr createDerivFilter(int srcType, int dstType, int dx, int dy, int ksize, bool normalize = false, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); -//! the Sobel operator +//! creates a Sobel operator CV_EXPORTS Ptr createSobelFilter(int srcType, int dstType, int dx, int dy, int ksize = 3, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); -//! the vertical or horizontal Scharr operator +//! creates a vertical or horizontal Scharr operator CV_EXPORTS Ptr createScharrFilter(int srcType, int dstType, int dx, int dy, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); +// obsolete + __OPENCV_GPUFILTERS_DEPR_BEFORE__ void Sobel(InputArray src, OutputArray dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; @@ -188,11 +199,13 @@ inline void Scharr(InputArray src, OutputArray dst, int ddepth, int dx, int dy, //////////////////////////////////////////////////////////////////////////////////////////////////// // Gaussian Filter -//! smooths the image using Gaussian filter +//! creates a Gaussian filter CV_EXPORTS Ptr createGaussianFilter(int srcType, int dstType, Size ksize, double sigma1, double sigma2 = 0, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); +// obsolete + __OPENCV_GPUFILTERS_DEPR_BEFORE__ void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sigma1, double sigma2 = 0, int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, @@ -207,10 +220,12 @@ inline void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sig //////////////////////////////////////////////////////////////////////////////////////////////////// // Morphology Filter -//! returns 2D morphological filter +//! creates a 2D morphological filter //! supports CV_8UC1 and CV_8UC4 types CV_EXPORTS Ptr createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1); +// obsolete + __OPENCV_GPUFILTERS_DEPR_BEFORE__ void erode(InputArray src, OutputArray dst, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; @@ -244,12 +259,12 @@ inline void morphologyEx(InputArray src, OutputArray dst, int op, InputArray ker //////////////////////////////////////////////////////////////////////////////////////////////////// // Image Rank Filter -//! Result pixel value is the maximum of pixel values under the rectangular mask region +//! result pixel value is the maximum of pixel values under the rectangular mask region CV_EXPORTS Ptr createBoxMaxFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -//! Result pixel value is the maximum of pixel values under the rectangular mask region +//! result pixel value is the maximum of pixel values under the rectangular mask region CV_EXPORTS Ptr createBoxMinFilter(int srcType, Size ksize, Point anchor = Point(-1, -1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); From 5db08961cec08f309c3165fa086a0eb8e8e5d6ee Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Tue, 18 Jun 2013 06:59:52 +0400 Subject: [PATCH 089/121] fixed Kirill's comments --- modules/highgui/src/cap_ios_abstract_camera.mm | 4 ++-- modules/highgui/src/cap_ios_photo_camera.mm | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm index a0e8f3e8b..dc4faaaef 100644 --- a/modules/highgui/src/cap_ios_abstract_camera.mm +++ b/modules/highgui/src/cap_ios_abstract_camera.mm @@ -470,7 +470,7 @@ device.whiteBalanceMode = AVCaptureWhiteBalanceModeLocked; [device unlockForConfiguration]; } else { - NSLog(@"unable to lock device for locked exposure configuration %@", [error localizedDescription]); + NSLog(@"unable to lock device for locked white balance configuration %@", [error localizedDescription]); } } } @@ -484,7 +484,7 @@ device.whiteBalanceMode = AVCaptureWhiteBalanceModeContinuousAutoWhiteBalance; [device unlockForConfiguration]; } else { - NSLog(@"unable to lock device for autoexposure configuration %@", [error localizedDescription]); + NSLog(@"unable to lock device for auto white balance configuration %@", [error localizedDescription]); } } } diff --git a/modules/highgui/src/cap_ios_photo_camera.mm b/modules/highgui/src/cap_ios_photo_camera.mm index f8891f227..f05cfa5f8 100644 --- a/modules/highgui/src/cap_ios_photo_camera.mm +++ b/modules/highgui/src/cap_ios_photo_camera.mm @@ -32,7 +32,7 @@ #import "opencv2/highgui/cap_ios.h" #include "precomp.hpp" -#pragma mark - Private Interface mark - Private Interface +#pragma mark - Private Interface @interface CvPhotoCamera () From 24fd2cc326db17a511eda02670dd64209b7b689a Mon Sep 17 00:00:00 2001 From: Alexander Shishkov Date: Tue, 18 Jun 2013 07:02:09 +0400 Subject: [PATCH 090/121] updated licenses --- modules/highgui/src/cap_ios_abstract_camera.mm | 1 + modules/highgui/src/cap_ios_video_camera.mm | 1 + 2 files changed, 2 insertions(+) diff --git a/modules/highgui/src/cap_ios_abstract_camera.mm b/modules/highgui/src/cap_ios_abstract_camera.mm index dc4faaaef..38e1c12e6 100644 --- a/modules/highgui/src/cap_ios_abstract_camera.mm +++ b/modules/highgui/src/cap_ios_abstract_camera.mm @@ -2,6 +2,7 @@ * cap_ios_abstract_camera.mm * For iOS video I/O * by Eduard Feicho on 29/07/12 + * by Alexander Shishkov on 17/07/13 * Copyright 2012. All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/modules/highgui/src/cap_ios_video_camera.mm b/modules/highgui/src/cap_ios_video_camera.mm index 588adfc9c..ac85f79ee 100644 --- a/modules/highgui/src/cap_ios_video_camera.mm +++ b/modules/highgui/src/cap_ios_video_camera.mm @@ -2,6 +2,7 @@ * cap_ios_video_camera.mm * For iOS video I/O * by Eduard Feicho on 29/07/12 + * by Alexander Shishkov on 17/07/13 * Copyright 2012. All rights reserved. * * Redistribution and use in source and binary forms, with or without From 4e29f0ee6d9c57ea22068b8bc0c99c2414816b50 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 8 May 2013 17:07:29 +0400 Subject: [PATCH 091/121] updated documentation --- modules/gpustereo/doc/stereo.rst | 415 ++++++------------ .../gpustereo/include/opencv2/gpustereo.hpp | 1 + 2 files changed, 141 insertions(+), 275 deletions(-) diff --git a/modules/gpustereo/doc/stereo.rst b/modules/gpustereo/doc/stereo.rst index cd2add0b9..4064fe0a3 100644 --- a/modules/gpustereo/doc/stereo.rst +++ b/modules/gpustereo/doc/stereo.rst @@ -5,135 +5,75 @@ Stereo Correspondence -gpu::StereoBM_GPU ------------------ -.. ocv:class:: gpu::StereoBM_GPU +gpu::StereoBM +------------- +.. ocv:class:: gpu::StereoBM : public cv::StereoBM Class computing stereo correspondence (disparity map) using the block matching algorithm. :: - class StereoBM_GPU - { - public: - enum { BASIC_PRESET = 0, PREFILTER_XSOBEL = 1 }; - - enum { DEFAULT_NDISP = 64, DEFAULT_WINSZ = 19 }; - - StereoBM_GPU(); - StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, - int winSize = DEFAULT_WINSZ); - - void operator() (const GpuMat& left, const GpuMat& right, - GpuMat& disparity, Stream& stream = Stream::Null()); - - static bool checkIfGpuCallReasonable(); - - int preset; - int ndisp; - int winSize; - - float avergeTexThreshold; - - ... - }; - - -The class also performs pre- and post-filtering steps: Sobel pre-filtering (if ``PREFILTER_XSOBEL`` flag is set) and low textureness filtering (if ``averageTexThreshols > 0`` ). If ``avergeTexThreshold = 0`` , low textureness filtering is disabled. Otherwise, the disparity is set to 0 in each point ``(x, y)`` , where for the left image - -.. math:: - \sum HorizontalGradiensInWindow(x, y, winSize) < (winSize \cdot winSize) \cdot avergeTexThreshold - -This means that the input left image is low textured. +.. seealso:: :ocv:class:`StereoBM` -gpu::StereoBM_GPU::StereoBM_GPU ------------------------------------ -Enables :ocv:class:`gpu::StereoBM_GPU` constructors. +gpu::createStereoBM +------------------- +Creates StereoBM object. -.. ocv:function:: gpu::StereoBM_GPU::StereoBM_GPU() +.. ocv:function:: Ptr gpu::createStereoBM(int numDisparities = 64, int blockSize = 19) -.. ocv:function:: gpu::StereoBM_GPU::StereoBM_GPU(int preset, int ndisparities = DEFAULT_NDISP, int winSize = DEFAULT_WINSZ) + :param numDisparities: the disparity search range. For each pixel algorithm will find the best disparity from 0 (default minimum disparity) to ``numDisparities``. The search range can then be shifted by changing the minimum disparity. - :param preset: Parameter presetting: - - * **BASIC_PRESET** Basic mode without pre-processing. - - * **PREFILTER_XSOBEL** Sobel pre-filtering mode. - - :param ndisparities: Number of disparities. It must be a multiple of 8 and less or equal to 256. - - :param winSize: Block size. - - - -gpu::StereoBM_GPU::operator () ----------------------------------- -Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair. - -.. ocv:function:: void gpu::StereoBM_GPU::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null()) - - :param left: Left image. Only ``CV_8UC1`` type is supported. - - :param right: Right image with the same size and the same type as the left one. - - :param disparity: Output disparity map. It is a ``CV_8UC1`` image with the same size as the input images. - - :param stream: Stream for the asynchronous version. - - - -gpu::StereoBM_GPU::checkIfGpuCallReasonable ------------------------------------------------ -Uses a heuristic method to estimate whether the current GPU is faster than the CPU in this algorithm. It queries the currently active device. - -.. ocv:function:: bool gpu::StereoBM_GPU::checkIfGpuCallReasonable() + :param blockSize: the linear size of the blocks compared by the algorithm. The size should be odd (as the block is centered at the current pixel). Larger block size implies smoother, though less accurate disparity map. Smaller block size gives more detailed disparity map, but there is higher chance for algorithm to find a wrong correspondence. gpu::StereoBeliefPropagation ---------------------------- -.. ocv:class:: gpu::StereoBeliefPropagation +.. ocv:class:: gpu::StereoBeliefPropagation : public cv::StereoMatcher Class computing stereo correspondence using the belief propagation algorithm. :: - class StereoBeliefPropagation + class CV_EXPORTS StereoBeliefPropagation : public cv::StereoMatcher { public: - enum { DEFAULT_NDISP = 64 }; - enum { DEFAULT_ITERS = 5 }; - enum { DEFAULT_LEVELS = 5 }; + using cv::StereoMatcher::compute; - static void estimateRecommendedParams(int width, int height, - int& ndisp, int& iters, int& levels); + virtual void compute(InputArray left, InputArray right, OutputArray disparity, Stream& stream) = 0; - explicit StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, - int iters = DEFAULT_ITERS, - int levels = DEFAULT_LEVELS, - int msg_type = CV_32F); - StereoBeliefPropagation(int ndisp, int iters, int levels, - float max_data_term, float data_weight, - float max_disc_term, float disc_single_jump, - int msg_type = CV_32F); + //! version for user specified data term + virtual void compute(InputArray data, OutputArray disparity, Stream& stream = Stream::Null()) = 0; - void operator()(const GpuMat& left, const GpuMat& right, - GpuMat& disparity, Stream& stream = Stream::Null()); - void operator()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null()); + //! number of BP iterations on each level + virtual int getNumIters() const = 0; + virtual void setNumIters(int iters) = 0; - int ndisp; + //! number of levels + virtual int getNumLevels() const = 0; + virtual void setNumLevels(int levels) = 0; - int iters; - int levels; + //! truncation of data cost + virtual double getMaxDataTerm() const = 0; + virtual void setMaxDataTerm(double max_data_term) = 0; - float max_data_term; - float data_weight; - float max_disc_term; - float disc_single_jump; + //! data weight + virtual double getDataWeight() const = 0; + virtual void setDataWeight(double data_weight) = 0; - int msg_type; + //! truncation of discontinuity cost + virtual double getMaxDiscTerm() const = 0; + virtual void setMaxDiscTerm(double max_disc_term) = 0; - ... + //! discontinuity single jump + virtual double getDiscSingleJump() const = 0; + virtual void setDiscSingleJump(double disc_single_jump) = 0; + + virtual int getMsgType() const = 0; + virtual void setMsgType(int msg_type) = 0; + + static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels); }; + The class implements algorithm described in [Felzenszwalb2006]_ . It can compute own data cost (using a truncated linear model) or use a user-provided data cost. .. note:: @@ -152,32 +92,6 @@ The class implements algorithm described in [Felzenszwalb2006]_ . It can compute ``width_step`` is the number of bytes in a line including padding. - - -gpu::StereoBeliefPropagation::StereoBeliefPropagation ---------------------------------------------------------- -Enables the :ocv:class:`gpu::StereoBeliefPropagation` constructors. - -.. ocv:function:: gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int msg_type = CV_32F) - -.. ocv:function:: gpu::StereoBeliefPropagation::StereoBeliefPropagation(int ndisp, int iters, int levels, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int msg_type = CV_32F) - - :param ndisp: Number of disparities. - - :param iters: Number of BP iterations on each level. - - :param levels: Number of levels. - - :param max_data_term: Threshold for data cost truncation. - - :param data_weight: Data weight. - - :param max_disc_term: Threshold for discontinuity truncation. - - :param disc_single_jump: Discontinuity single jump. - - :param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported. - ``StereoBeliefPropagation`` uses a truncated linear model for the data cost and discontinuity terms: .. math:: @@ -190,33 +104,45 @@ Enables the :ocv:class:`gpu::StereoBeliefPropagation` constructors. For more details, see [Felzenszwalb2006]_. -By default, :ocv:class:`gpu::StereoBeliefPropagation` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement: +By default, ``StereoBeliefPropagation`` uses floating-point arithmetics and the ``CV_32FC1`` type for messages. But it can also use fixed-point arithmetics and the ``CV_16SC1`` message type for better performance. To avoid an overflow in this case, the parameters must satisfy the following requirement: .. math:: 10 \cdot 2^{levels-1} \cdot max \_ data \_ term < SHRT \_ MAX +.. seealso:: :ocv:class:`StereoMatcher` + + + +gpu::createStereoBeliefPropagation +---------------------------------- +Creates StereoBeliefPropagation object. + +.. ocv:function:: Ptr gpu::createStereoBeliefPropagation(int ndisp = 64, int iters = 5, int levels = 5, int msg_type = CV_32F) + + :param ndisp: Number of disparities. + + :param iters: Number of BP iterations on each level. + + :param levels: Number of levels. + + :param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported. + gpu::StereoBeliefPropagation::estimateRecommendedParams ------------------------------------------------------------ +------------------------------------------------------- Uses a heuristic method to compute the recommended parameters ( ``ndisp``, ``iters`` and ``levels`` ) for the specified image size ( ``width`` and ``height`` ). .. ocv:function:: void gpu::StereoBeliefPropagation::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels) -gpu::StereoBeliefPropagation::operator () ---------------------------------------------- -Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair or data cost. +gpu::StereoBeliefPropagation::compute +------------------------------------- +Enables the stereo correspondence operator that finds the disparity for the specified data cost. -.. ocv:function:: void gpu::StereoBeliefPropagation::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null()) - -.. ocv:function:: void gpu::StereoBeliefPropagation::operator ()(const GpuMat& data, GpuMat& disparity, Stream& stream = Stream::Null()) - - :param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported. - - :param right: Right image with the same size and the same type as the left one. +.. ocv:function:: void gpu::StereoBeliefPropagation::compute(InputArray data, OutputArray disparity, Stream& stream = Stream::Null()) :param data: User-specified data cost, a matrix of ``msg_type`` type and ``Size(*ndisp, )`` size. @@ -228,89 +154,26 @@ Enables the stereo correspondence operator that finds the disparity for the spec gpu::StereoConstantSpaceBP -------------------------- -.. ocv:class:: gpu::StereoConstantSpaceBP +.. ocv:class:: gpu::StereoConstantSpaceBP : public gpu::StereoBeliefPropagation Class computing stereo correspondence using the constant space belief propagation algorithm. :: - class StereoConstantSpaceBP + class CV_EXPORTS StereoConstantSpaceBP : public gpu::StereoBeliefPropagation { public: - enum { DEFAULT_NDISP = 128 }; - enum { DEFAULT_ITERS = 8 }; - enum { DEFAULT_LEVELS = 4 }; - enum { DEFAULT_NR_PLANE = 4 }; + //! number of active disparity on the first level + virtual int getNrPlane() const = 0; + virtual void setNrPlane(int nr_plane) = 0; - static void estimateRecommendedParams(int width, int height, - int& ndisp, int& iters, int& levels, int& nr_plane); + virtual bool getUseLocalInitDataCost() const = 0; + virtual void setUseLocalInitDataCost(bool use_local_init_data_cost) = 0; - explicit StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, - int iters = DEFAULT_ITERS, - int levels = DEFAULT_LEVELS, - int nr_plane = DEFAULT_NR_PLANE, - int msg_type = CV_32F); - StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, - float max_data_term, float data_weight, - float max_disc_term, float disc_single_jump, - int min_disp_th = 0, - int msg_type = CV_32F); - - void operator()(const GpuMat& left, const GpuMat& right, - GpuMat& disparity, Stream& stream = Stream::Null()); - - int ndisp; - - int iters; - int levels; - - int nr_plane; - - float max_data_term; - float data_weight; - float max_disc_term; - float disc_single_jump; - - int min_disp_th; - - int msg_type; - - bool use_local_init_data_cost; - - ... + static void estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane); }; The class implements algorithm described in [Yang2010]_. ``StereoConstantSpaceBP`` supports both local minimum and global minimum data cost initialization algorithms. For more details, see the paper mentioned above. By default, a local algorithm is used. To enable a global algorithm, set ``use_local_init_data_cost`` to ``false`` . - - -gpu::StereoConstantSpaceBP::StereoConstantSpaceBP ------------------------------------------------------ -Enables the :ocv:class:`gpu::StereoConstantSpaceBP` constructors. - -.. ocv:function:: gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp = DEFAULT_NDISP, int iters = DEFAULT_ITERS, int levels = DEFAULT_LEVELS, int nr_plane = DEFAULT_NR_PLANE, int msg_type = CV_32F) - -.. ocv:function:: gpu::StereoConstantSpaceBP::StereoConstantSpaceBP(int ndisp, int iters, int levels, int nr_plane, float max_data_term, float data_weight, float max_disc_term, float disc_single_jump, int min_disp_th = 0, int msg_type = CV_32F) - - :param ndisp: Number of disparities. - - :param iters: Number of BP iterations on each level. - - :param levels: Number of levels. - - :param nr_plane: Number of disparity levels on the first level. - - :param max_data_term: Truncation of data cost. - - :param data_weight: Data weight. - - :param max_disc_term: Truncation of discontinuity. - - :param disc_single_jump: Discontinuity single jump. - - :param min_disp_th: Minimal disparity threshold. - - :param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported. - ``StereoConstantSpaceBP`` uses a truncated linear model for the data cost and discontinuity terms: .. math:: @@ -331,54 +194,65 @@ By default, ``StereoConstantSpaceBP`` uses floating-point arithmetics and the `` +gpu::createStereoConstantSpaceBP +-------------------------------- +Creates StereoConstantSpaceBP object. + +.. ocv:function:: Ptr gpu::createStereoConstantSpaceBP(int ndisp = 128, int iters = 8, int levels = 4, int nr_plane = 4, int msg_type = CV_32F) + + :param ndisp: Number of disparities. + + :param iters: Number of BP iterations on each level. + + :param levels: Number of levels. + + :param nr_plane: Number of disparity levels on the first level. + + :param msg_type: Type for messages. ``CV_16SC1`` and ``CV_32FC1`` types are supported. + + + gpu::StereoConstantSpaceBP::estimateRecommendedParams ---------------------------------------------------------- +----------------------------------------------------- Uses a heuristic method to compute parameters (ndisp, iters, levelsand nrplane) for the specified image size (widthand height). .. ocv:function:: void gpu::StereoConstantSpaceBP::estimateRecommendedParams(int width, int height, int& ndisp, int& iters, int& levels, int& nr_plane) -gpu::StereoConstantSpaceBP::operator () -------------------------------------------- -Enables the stereo correspondence operator that finds the disparity for the specified rectified stereo pair. - -.. ocv:function:: void gpu::StereoConstantSpaceBP::operator ()(const GpuMat& left, const GpuMat& right, GpuMat& disparity, Stream& stream = Stream::Null()) - - :param left: Left image. ``CV_8UC1`` , ``CV_8UC3`` and ``CV_8UC4`` types are supported. - - :param right: Right image with the same size and the same type as the left one. - - :param disparity: Output disparity map. If ``disparity`` is empty, the output type is ``CV_16SC1`` . Otherwise, the output type is ``disparity.type()`` . - - :param stream: Stream for the asynchronous version. - - - gpu::DisparityBilateralFilter ----------------------------- -.. ocv:class:: gpu::DisparityBilateralFilter +.. ocv:class:: gpu::DisparityBilateralFilter : public cv::Algorithm Class refining a disparity map using joint bilateral filtering. :: - class CV_EXPORTS DisparityBilateralFilter + class CV_EXPORTS DisparityBilateralFilter : public cv::Algorithm { public: - enum { DEFAULT_NDISP = 64 }; - enum { DEFAULT_RADIUS = 3 }; - enum { DEFAULT_ITERS = 1 }; + //! the disparity map refinement operator. Refine disparity map using joint bilateral filtering given a single color image. + //! disparity must have CV_8U or CV_16S type, image must have CV_8UC1 or CV_8UC3 type. + virtual void apply(InputArray disparity, InputArray image, OutputArray dst, Stream& stream = Stream::Null()) = 0; - explicit DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, - int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS); + virtual int getNumDisparities() const = 0; + virtual void setNumDisparities(int numDisparities) = 0; - DisparityBilateralFilter(int ndisp, int radius, int iters, - float edge_threshold, float max_disc_threshold, - float sigma_range); + virtual int getRadius() const = 0; + virtual void setRadius(int radius) = 0; - void operator()(const GpuMat& disparity, const GpuMat& image, - GpuMat& dst, Stream& stream = Stream::Null()); + virtual int getNumIters() const = 0; + virtual void setNumIters(int iters) = 0; - ... + //! truncation of data continuity + virtual double getEdgeThreshold() const = 0; + virtual void setEdgeThreshold(double edge_threshold) = 0; + + //! truncation of disparity continuity + virtual double getMaxDiscThreshold() const = 0; + virtual void setMaxDiscThreshold(double max_disc_threshold) = 0; + + //! filter range sigma + virtual double getSigmaRange() const = 0; + virtual void setSigmaRange(double sigma_range) = 0; }; @@ -386,13 +260,11 @@ The class implements [Yang2010]_ algorithm. -gpu::DisparityBilateralFilter::DisparityBilateralFilter ------------------------------------------------------------ -Enables the :ocv:class:`gpu::DisparityBilateralFilter` constructors. +gpu::createDisparityBilateralFilter +----------------------------------- +Creates DisparityBilateralFilter object. -.. ocv:function:: gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp = DEFAULT_NDISP, int radius = DEFAULT_RADIUS, int iters = DEFAULT_ITERS) - -.. ocv:function:: gpu::DisparityBilateralFilter::DisparityBilateralFilter(int ndisp, int radius, int iters, float edge_threshold, float max_disc_threshold, float sigma_range) +.. ocv:function:: Ptr gpu::createDisparityBilateralFilter(int ndisp = 64, int radius = 3, int iters = 1) :param ndisp: Number of disparities. @@ -400,19 +272,13 @@ Enables the :ocv:class:`gpu::DisparityBilateralFilter` constructors. :param iters: Number of iterations. - :param edge_threshold: Threshold for edges. - - :param max_disc_threshold: Constant to reject outliers. - - :param sigma_range: Filter range. - -gpu::DisparityBilateralFilter::operator () ----------------------------------------------- +gpu::DisparityBilateralFilter::apply +------------------------------------ Refines a disparity map using joint bilateral filtering. -.. ocv:function:: void gpu::DisparityBilateralFilter::operator ()(const GpuMat& disparity, const GpuMat& image, GpuMat& dst, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::DisparityBilateralFilter::apply(InputArray disparity, InputArray image, OutputArray dst, Stream& stream = Stream::Null()) :param disparity: Input disparity map. ``CV_8UC1`` and ``CV_16SC1`` types are supported. @@ -424,29 +290,11 @@ Refines a disparity map using joint bilateral filtering. -gpu::drawColorDisp ----------------------- -Colors a disparity image. - -.. ocv:function:: void gpu::drawColorDisp(const GpuMat& src_disp, GpuMat& dst_disp, int ndisp, Stream& stream = Stream::Null()) - - :param src_disp: Source disparity image. ``CV_8UC1`` and ``CV_16SC1`` types are supported. - - :param dst_disp: Output disparity image. It has the same size as ``src_disp`` . The type is ``CV_8UC4`` in ``BGRA`` format (alpha = 255). - - :param ndisp: Number of disparities. - - :param stream: Stream for the asynchronous version. - -This function draws a colored disparity map by converting disparity values from ``[0..ndisp)`` interval first to ``HSV`` color space (where different disparity values correspond to different hues) and then converting the pixels to ``RGB`` for visualization. - - - gpu::reprojectImageTo3D ---------------------------- +----------------------- Reprojects a disparity image to 3D space. -.. ocv:function:: void gpu::reprojectImageTo3D(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, int dst_cn = 4, Stream& stream = Stream::Null()) +.. ocv:function:: void gpu::reprojectImageTo3D(InputArray disp, OutputArray xyzw, InputArray Q, int dst_cn = 4, Stream& stream = Stream::Null()) :param disp: Input disparity image. ``CV_8U`` and ``CV_16S`` types are supported. @@ -462,6 +310,23 @@ Reprojects a disparity image to 3D space. -.. [Felzenszwalb2006] Pedro F. Felzenszwalb algorithm [Pedro F. Felzenszwalb and Daniel P. Huttenlocher. *Efficient belief propagation for early vision*. International Journal of Computer Vision, 70(1), October 2006 +gpu::drawColorDisp +------------------ +Colors a disparity image. +.. ocv:function:: void gpu::drawColorDisp(InputArray src_disp, OutputArray dst_disp, int ndisp, Stream& stream = Stream::Null()) + + :param src_disp: Source disparity image. ``CV_8UC1`` and ``CV_16SC1`` types are supported. + + :param dst_disp: Output disparity image. It has the same size as ``src_disp`` . The type is ``CV_8UC4`` in ``BGRA`` format (alpha = 255). + + :param ndisp: Number of disparities. + + :param stream: Stream for the asynchronous version. + +This function draws a colored disparity map by converting disparity values from ``[0..ndisp)`` interval first to ``HSV`` color space (where different disparity values correspond to different hues) and then converting the pixels to ``RGB`` for visualization. + + + +.. [Felzenszwalb2006] Pedro F. Felzenszwalb algorithm [Pedro F. Felzenszwalb and Daniel P. Huttenlocher. *Efficient belief propagation for early vision*. International Journal of Computer Vision, 70(1), October 2006 .. [Yang2010] Q. Yang, L. Wang, and N. Ahuja. *A constant-space belief propagation algorithm for stereo matching*. In CVPR, 2010. diff --git a/modules/gpustereo/include/opencv2/gpustereo.hpp b/modules/gpustereo/include/opencv2/gpustereo.hpp index d303c7897..250e89b85 100644 --- a/modules/gpustereo/include/opencv2/gpustereo.hpp +++ b/modules/gpustereo/include/opencv2/gpustereo.hpp @@ -103,6 +103,7 @@ public: virtual double getDiscSingleJump() const = 0; virtual void setDiscSingleJump(double disc_single_jump) = 0; + //! type for messages (CV_16SC1 or CV_32FC1) virtual int getMsgType() const = 0; virtual void setMsgType(int msg_type) = 0; From f003e29dc0e10fa7d28dd5c717fbec134b2bf67e Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 13 Jun 2013 12:22:12 +0400 Subject: [PATCH 092/121] Updated testlog_parser.py to the latest version from the private repo. --- modules/ts/misc/testlog_parser.py | 39 +++++++++++++++++-------------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/modules/ts/misc/testlog_parser.py b/modules/ts/misc/testlog_parser.py index 7ae6aa598..8ab21417c 100755 --- a/modules/ts/misc/testlog_parser.py +++ b/modules/ts/misc/testlog_parser.py @@ -100,34 +100,39 @@ class TestInfo(object): def dump(self, units="ms"): print "%s ->\t\033[1;31m%s\033[0m = \t%.2f%s" % (str(self), self.status, self.get("gmean", units), units) - def shortName(self): + + def getName(self): pos = self.name.find("/") if pos > 0: - name = self.name[:pos] - else: - name = self.name - if self.fixture.endswith(name): - fixture = self.fixture[:-len(name)] + return self.name[:pos] + return self.name + + + def getFixture(self): + if self.fixture.endswith(self.getName()): + fixture = self.fixture[:-len(self.getName())] else: fixture = self.fixture if fixture.endswith("_"): fixture = fixture[:-1] + return fixture + + + def param(self): + return '::'.join(filter(None, [self.type_param, self.value_param])) + + def shortName(self): + name = self.getName() + fixture = self.getFixture() return '::'.join(filter(None, [name, fixture])) + def __str__(self): - pos = self.name.find("/") - if pos > 0: - name = self.name[:pos] - else: - name = self.name - if self.fixture.endswith(name): - fixture = self.fixture[:-len(name)] - else: - fixture = self.fixture - if fixture.endswith("_"): - fixture = fixture[:-1] + name = self.getName() + fixture = self.getFixture() return '::'.join(filter(None, [name, fixture, self.type_param, self.value_param])) + def __cmp__(self, other): r = cmp(self.fixture, other.fixture); if r != 0: From 6ff207b53a6379933018c88167ee11b5b1a62e2d Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Fri, 14 Jun 2013 14:53:02 +0400 Subject: [PATCH 093/121] Added a new and improved version of the XLS report generator. --- modules/ts/misc/xls-report.py | 171 ++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100755 modules/ts/misc/xls-report.py diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py new file mode 100755 index 000000000..fb6cfd096 --- /dev/null +++ b/modules/ts/misc/xls-report.py @@ -0,0 +1,171 @@ +#!/usr/bin/env python + +from __future__ import division + +import ast +import logging +import os, os.path +import re + +from argparse import ArgumentParser +from glob import glob +from itertools import ifilter + +import xlwt + +from testlog_parser import parseLogFile + +# To build XLS report you neet to put your xmls (OpenCV tests output) in the +# following way: +# +# "root" --- folder, representing the whole XLS document. It contains several +# subfolders --- sheet-paths of the XLS document. Each sheet-path contains it's +# subfolders --- config-paths. Config-paths are columns of the sheet and +# they contains xmls files --- output of OpenCV modules testing. +# Config-path means OpenCV build configuration, including different +# options such as NEON, TBB, GPU enabling/disabling. +# +# root +# root\sheet_path +# root\sheet_path\configuration1 (column 1) +# root\sheet_path\configuration2 (column 2) + +re_image_size = re.compile(r'^ \d+ x \d+$', re.VERBOSE) +re_data_type = re.compile(r'^ (?: 8 | 16 | 32 | 64 ) [USF] C [1234] $', re.VERBOSE) + +time_style = xlwt.easyxf(num_format_str='#0.00') +no_time_style = xlwt.easyxf('pattern: pattern solid, fore_color gray25') + +speedup_style = time_style +good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00') +bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00') +no_speedup_style = no_time_style +error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange') +header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top') + +def collect_xml(collection, configuration, xml_fullname): + xml_fname = os.path.split(xml_fullname)[1] + module = xml_fname[:xml_fname.index('_')] + + if module not in collection: + collection[module] = {} + + for test in sorted(parseLogFile(xml_fullname)): + if test.shortName() not in collection[module]: + collection[module][test.shortName()] = {} + if test.param() not in collection[module][test.shortName()]: + collection[module][test.shortName()][test.param()] = {} + collection[module][test.shortName()][test.param()][configuration] = \ + test.get("gmean") + +def main(): + arg_parser = ArgumentParser(description='Build an XLS performance report.') + arg_parser.add_argument('sheet_dirs', nargs='+', metavar='DIR', help='directory containing perf test logs') + arg_parser.add_argument('-o', '--output', metavar='XLS', default='report.xls', help='name of output file') + arg_parser.add_argument('-c', '--config', metavar='CONF', help='global configuration file') + + args = arg_parser.parse_args() + + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG) + + if args.config is not None: + with open(args.config) as global_conf_file: + global_conf = ast.literal_eval(global_conf_file.read()) + else: + global_conf = {} + + wb = xlwt.Workbook() + + for sheet_path in args.sheet_dirs: + try: + with open(os.path.join(sheet_path, 'sheet.conf')) as sheet_conf_file: + sheet_conf = ast.literal_eval(sheet_conf_file.read()) + except Exception: + sheet_conf = {} + logging.debug('no sheet.conf for {}'.format(sheet_path)) + + sheet_conf = dict(global_conf.items() + sheet_conf.items()) + + if 'configurations' in sheet_conf: + config_names = sheet_conf['configurations'] + else: + try: + config_names = [p for p in os.listdir(sheet_path) + if os.path.isdir(os.path.join(sheet_path, p))] + except Exception as e: + logging.warning(e) + continue + + collection = {} + + for configuration, configuration_path in \ + [(c, os.path.join(sheet_path, c)) for c in config_names]: + logging.info('processing {}'.format(configuration_path)) + for xml_fullname in glob(os.path.join(configuration_path, '*.xml')): + collect_xml(collection, configuration, xml_fullname) + + sheet = wb.add_sheet(sheet_conf.get('sheet_name', os.path.basename(os.path.abspath(sheet_path)))) + + sheet.row(0).height = 800 + sheet.panes_frozen = True + sheet.remove_splits = True + sheet.horz_split_pos = 1 + sheet.horz_split_first_visible = 1 + + sheet_comparisons = sheet_conf.get('comparisons', []) + + for i, w in enumerate([2000, 15000, 2500, 2000, 15000] + + (len(config_names) + 1 + len(sheet_comparisons)) * [3000]): + sheet.col(i).width = w + + for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters'] + + config_names + [None] + + [comp['from'] + '\nvs\n' + comp['to'] for comp in sheet_comparisons]): + sheet.row(0).write(i, caption, header_style) + + row = 1 + + module_colors = sheet_conf.get('module_colors', {}) + module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) + for module, color in module_colors.iteritems()} + + for module, tests in collection.iteritems(): + for test, params in tests.iteritems(): + for param, configs in params.iteritems(): + sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) + sheet.write(row, 1, test) + + param_list = param[1:-1].split(", ") + sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) + sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) + + sheet.row(row).write(4, param) + for i, c in enumerate(config_names): + if c in configs: + sheet.write(row, 5 + i, configs[c], time_style) + else: + sheet.write(row, 5 + i, None, no_time_style) + + for i, comp in enumerate(sheet_comparisons): + left = configs.get(comp["from"]) + right = configs.get(comp["to"]) + col = 5 + len(config_names) + 1 + i + + if left is not None and right is not None: + try: + speedup = left / right + sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else + bad_speedup_style if speedup < 0.9 else + speedup_style) + except ArithmeticError as e: + sheet.write(row, col, None, error_speedup_style) + else: + sheet.write(row, col, None, no_speedup_style) + + row += 1 + if row % 1000 == 0: sheet.flush_row_data() + + wb.save(args.output) + +if __name__ == '__main__': + main() From 4d7b1b5eded9cfbb456b0238a2f55c6f6ae491ee Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Mon, 17 Jun 2013 21:06:02 +0400 Subject: [PATCH 094/121] In the XLS report, enabled word wrapping for header cells. Otherwise, Excel ignores line breaks in them. --- modules/ts/misc/xls-report.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index fb6cfd096..f8288e16d 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -41,7 +41,7 @@ good_speedup_style = xlwt.easyxf('font: color green', num_format_str='#0.00') bad_speedup_style = xlwt.easyxf('font: color red', num_format_str='#0.00') no_speedup_style = no_time_style error_speedup_style = xlwt.easyxf('pattern: pattern solid, fore_color orange') -header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top') +header_style = xlwt.easyxf('font: bold true; alignment: horizontal centre, vertical top, wrap True') def collect_xml(collection, configuration, xml_fullname): xml_fname = os.path.split(xml_fullname)[1] From 0f1156bbb61efa0ec7d7b48e8a0cd02ec72378ba Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 18 Jun 2013 13:36:20 +0400 Subject: [PATCH 095/121] Made the order of tests in XLS reports deterministic. --- modules/ts/misc/xls-report.py | 71 ++++++++++++++++------------------- 1 file changed, 33 insertions(+), 38 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index f8288e16d..7e63b6737 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -8,6 +8,7 @@ import os, os.path import re from argparse import ArgumentParser +from collections import OrderedDict from glob import glob from itertools import ifilter @@ -47,16 +48,11 @@ def collect_xml(collection, configuration, xml_fullname): xml_fname = os.path.split(xml_fullname)[1] module = xml_fname[:xml_fname.index('_')] - if module not in collection: - collection[module] = {} + module_tests = collection.setdefault(module, OrderedDict()) for test in sorted(parseLogFile(xml_fullname)): - if test.shortName() not in collection[module]: - collection[module][test.shortName()] = {} - if test.param() not in collection[module][test.shortName()]: - collection[module][test.shortName()][test.param()] = {} - collection[module][test.shortName()][test.param()][configuration] = \ - test.get("gmean") + test_results = module_tests.setdefault((test.shortName(), test.param()), {}) + test_results[configuration] = test.get("gmean") def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') @@ -129,41 +125,40 @@ def main(): module_styles = {module: xlwt.easyxf('pattern: pattern solid, fore_color {}'.format(color)) for module, color in module_colors.iteritems()} - for module, tests in collection.iteritems(): - for test, params in tests.iteritems(): - for param, configs in params.iteritems(): - sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) - sheet.write(row, 1, test) + for module, tests in sorted(collection.iteritems()): + for ((test, param), configs) in tests.iteritems(): + sheet.write(row, 0, module, module_styles.get(module, xlwt.Style.default_style)) + sheet.write(row, 1, test) - param_list = param[1:-1].split(", ") - sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) - sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) + param_list = param[1:-1].split(", ") + sheet.write(row, 2, next(ifilter(re_image_size.match, param_list), None)) + sheet.write(row, 3, next(ifilter(re_data_type.match, param_list), None)) - sheet.row(row).write(4, param) - for i, c in enumerate(config_names): - if c in configs: - sheet.write(row, 5 + i, configs[c], time_style) - else: - sheet.write(row, 5 + i, None, no_time_style) + sheet.row(row).write(4, param) + for i, c in enumerate(config_names): + if c in configs: + sheet.write(row, 5 + i, configs[c], time_style) + else: + sheet.write(row, 5 + i, None, no_time_style) - for i, comp in enumerate(sheet_comparisons): - left = configs.get(comp["from"]) - right = configs.get(comp["to"]) - col = 5 + len(config_names) + 1 + i + for i, comp in enumerate(sheet_comparisons): + left = configs.get(comp["from"]) + right = configs.get(comp["to"]) + col = 5 + len(config_names) + 1 + i - if left is not None and right is not None: - try: - speedup = left / right - sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else - bad_speedup_style if speedup < 0.9 else - speedup_style) - except ArithmeticError as e: - sheet.write(row, col, None, error_speedup_style) - else: - sheet.write(row, col, None, no_speedup_style) + if left is not None and right is not None: + try: + speedup = left / right + sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else + bad_speedup_style if speedup < 0.9 else + speedup_style) + except ArithmeticError as e: + sheet.write(row, col, None, error_speedup_style) + else: + sheet.write(row, col, None, no_speedup_style) - row += 1 - if row % 1000 == 0: sheet.flush_row_data() + row += 1 + if row % 1000 == 0: sheet.flush_row_data() wb.save(args.output) From 584f0745d0f917c993629c6e77bf898c6d243bf0 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 18 Jun 2013 12:30:05 +0400 Subject: [PATCH 096/121] Made xls-report.py ignore tests that were not successful. --- modules/ts/misc/xls-report.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index 7e63b6737..f6278bae0 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -52,7 +52,8 @@ def collect_xml(collection, configuration, xml_fullname): for test in sorted(parseLogFile(xml_fullname)): test_results = module_tests.setdefault((test.shortName(), test.param()), {}) - test_results[configuration] = test.get("gmean") + if test.status == 'run': + test_results[configuration] = test.get("gmean") def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') From 16c4aad36de4e42624e70baf677dc67d0c17fefa Mon Sep 17 00:00:00 2001 From: Alexander Smorkalov Date: Thu, 13 Jun 2013 15:38:21 +0400 Subject: [PATCH 097/121] Java/Python bindings for computeCorrespondEpilines added. Simle Java test for computeCorrespondEpilines added. --- .../calib3d/include/opencv2/calib3d/calib3d.hpp | 6 +++--- .../src/org/opencv/test/calib3d/Calib3dTest.java | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) diff --git a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp index 0d1cc4691..f213a114f 100644 --- a/modules/calib3d/include/opencv2/calib3d/calib3d.hpp +++ b/modules/calib3d/include/opencv2/calib3d/calib3d.hpp @@ -639,9 +639,9 @@ CV_EXPORTS Mat findFundamentalMat( InputArray points1, InputArray points2, double param1=3., double param2=0.99); //! finds coordinates of epipolar lines corresponding the specified points -CV_EXPORTS void computeCorrespondEpilines( InputArray points, - int whichImage, InputArray F, - OutputArray lines ); +CV_EXPORTS_W void computeCorrespondEpilines( InputArray points, + int whichImage, InputArray F, + OutputArray lines ); CV_EXPORTS_W void triangulatePoints( InputArray projMatr1, InputArray projMatr2, InputArray projPoints1, InputArray projPoints2, diff --git a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java index 8bcaf58a0..db806b6fc 100644 --- a/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java +++ b/modules/java/android_test/src/org/opencv/test/calib3d/Calib3dTest.java @@ -585,4 +585,18 @@ public class Calib3dTest extends OpenCVTestCase { public void testValidateDisparityMatMatIntIntInt() { fail("Not yet implemented"); } + + public void testComputeCorrespondEpilines() + { + Mat fundamental = new Mat(3, 3, CvType.CV_64F); + fundamental.put(0, 0, 0, -0.577, 0.288, 0.577, 0, 0.288, -0.288, -0.288, 0); + MatOfPoint2f left = new MatOfPoint2f(); + left.alloc(1); + left.put(0, 0, 2, 3); //add(new Point(x, y)); + Mat lines = new Mat(); + Mat truth = new Mat(1, 1, CvType.CV_32FC3); + truth.put(0, 0, -0.70735186, 0.70686162, -0.70588124); + Calib3d.computeCorrespondEpilines(left, 1, fundamental, lines); + assertMatEqual(truth, lines, EPS); + } } From 1492b204727066daae2967f1bb2831acde42eb92 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 18 Jun 2013 13:17:33 +0400 Subject: [PATCH 098/121] fix gpu warnings with signed/unsigned char --- .../gpu/include/opencv2/gpu/device/limits.hpp | 231 +++++------------- modules/gpu/src/nvidia/core/NCV.hpp | 2 +- .../src/nvidia/core/NCVPixelOperations.hpp | 4 +- 3 files changed, 62 insertions(+), 175 deletions(-) diff --git a/modules/gpu/include/opencv2/gpu/device/limits.hpp b/modules/gpu/include/opencv2/gpu/device/limits.hpp index b040f199d..595978006 100644 --- a/modules/gpu/include/opencv2/gpu/device/limits.hpp +++ b/modules/gpu/include/opencv2/gpu/device/limits.hpp @@ -43,193 +43,80 @@ #ifndef __OPENCV_GPU_LIMITS_GPU_HPP__ #define __OPENCV_GPU_LIMITS_GPU_HPP__ -#include +#include +#include #include "common.hpp" namespace cv { namespace gpu { namespace device { - template struct numeric_limits - { - typedef T type; - __device__ __forceinline__ static type min() { return type(); }; - __device__ __forceinline__ static type max() { return type(); }; - __device__ __forceinline__ static type epsilon() { return type(); } - __device__ __forceinline__ static type round_error() { return type(); } - __device__ __forceinline__ static type denorm_min() { return type(); } - __device__ __forceinline__ static type infinity() { return type(); } - __device__ __forceinline__ static type quiet_NaN() { return type(); } - __device__ __forceinline__ static type signaling_NaN() { return T(); } - static const bool is_signed; - }; - template<> struct numeric_limits - { - typedef bool type; - __device__ __forceinline__ static type min() { return false; }; - __device__ __forceinline__ static type max() { return true; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template struct numeric_limits; - template<> struct numeric_limits - { - typedef char type; - __device__ __forceinline__ static type min() { return CHAR_MIN; }; - __device__ __forceinline__ static type max() { return CHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = (char)-1 == -1; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static bool min() { return false; } + __device__ __forceinline__ static bool max() { return true; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef char type; - __device__ __forceinline__ static type min() { return SCHAR_MIN; }; - __device__ __forceinline__ static type max() { return SCHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = (signed char)-1 == -1; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static signed char min() { return SCHAR_MIN; } + __device__ __forceinline__ static signed char max() { return SCHAR_MAX; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef unsigned char type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return UCHAR_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned char min() { return 0; } + __device__ __forceinline__ static unsigned char max() { return UCHAR_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef short type; - __device__ __forceinline__ static type min() { return SHRT_MIN; }; - __device__ __forceinline__ static type max() { return SHRT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static short min() { return SHRT_MIN; } + __device__ __forceinline__ static short max() { return SHRT_MAX; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef unsigned short type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return USHRT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned short min() { return 0; } + __device__ __forceinline__ static unsigned short max() { return USHRT_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef int type; - __device__ __forceinline__ static type min() { return INT_MIN; }; - __device__ __forceinline__ static type max() { return INT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static int min() { return INT_MIN; } + __device__ __forceinline__ static int max() { return INT_MAX; } + static const bool is_signed = true; +}; +template <> struct numeric_limits +{ + __device__ __forceinline__ static unsigned int min() { return 0; } + __device__ __forceinline__ static unsigned int max() { return UINT_MAX; } + static const bool is_signed = false; +}; - template<> struct numeric_limits - { - typedef unsigned int type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return UINT_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static float min() { return FLT_MIN; } + __device__ __forceinline__ static float max() { return FLT_MAX; } + __device__ __forceinline__ static float epsilon() { return FLT_EPSILON; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef long type; - __device__ __forceinline__ static type min() { return LONG_MIN; }; - __device__ __forceinline__ static type max() { return LONG_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; +template <> struct numeric_limits +{ + __device__ __forceinline__ static double min() { return DBL_MIN; } + __device__ __forceinline__ static double max() { return DBL_MAX; } + __device__ __forceinline__ static double epsilon() { return DBL_EPSILON; } + static const bool is_signed = true; +}; - template<> struct numeric_limits - { - typedef unsigned long type; - __device__ __forceinline__ static type min() { return 0; }; - __device__ __forceinline__ static type max() { return ULONG_MAX; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = false; - }; - - template<> struct numeric_limits - { - typedef float type; - __device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; }; - __device__ __forceinline__ static type max() { return 3.402823466e+38f/*FLT_MAX*/; }; - __device__ __forceinline__ static type epsilon() { return 1.192092896e-07f/*FLT_EPSILON*/; }; - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; - - template<> struct numeric_limits - { - typedef double type; - __device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; }; - __device__ __forceinline__ static type max() { return 1.7976931348623158e+308/*DBL_MAX*/; }; - __device__ __forceinline__ static type epsilon(); - __device__ __forceinline__ static type round_error(); - __device__ __forceinline__ static type denorm_min(); - __device__ __forceinline__ static type infinity(); - __device__ __forceinline__ static type quiet_NaN(); - __device__ __forceinline__ static type signaling_NaN(); - static const bool is_signed = true; - }; }}} // namespace cv { namespace gpu { namespace device { #endif // __OPENCV_GPU_LIMITS_GPU_HPP__ diff --git a/modules/gpu/src/nvidia/core/NCV.hpp b/modules/gpu/src/nvidia/core/NCV.hpp index 0394dba18..80e1da795 100644 --- a/modules/gpu/src/nvidia/core/NCV.hpp +++ b/modules/gpu/src/nvidia/core/NCV.hpp @@ -130,7 +130,7 @@ typedef int Ncv32s; typedef unsigned int Ncv32u; typedef short Ncv16s; typedef unsigned short Ncv16u; -typedef char Ncv8s; +typedef signed char Ncv8s; typedef unsigned char Ncv8u; typedef float Ncv32f; typedef double Ncv64f; diff --git a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp index ec2f16ebb..c1e06b434 100644 --- a/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp +++ b/modules/gpu/src/nvidia/core/NCVPixelOperations.hpp @@ -51,7 +51,7 @@ template inline __host__ __device__ TBase _pixMaxVal(); template<> static inline __host__ __device__ Ncv8u _pixMaxVal() {return UCHAR_MAX;} template<> static inline __host__ __device__ Ncv16u _pixMaxVal() {return USHRT_MAX;} template<> static inline __host__ __device__ Ncv32u _pixMaxVal() {return UINT_MAX;} -template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return CHAR_MAX;} +template<> static inline __host__ __device__ Ncv8s _pixMaxVal() {return SCHAR_MAX;} template<> static inline __host__ __device__ Ncv16s _pixMaxVal() {return SHRT_MAX;} template<> static inline __host__ __device__ Ncv32s _pixMaxVal() {return INT_MAX;} template<> static inline __host__ __device__ Ncv32f _pixMaxVal() {return FLT_MAX;} @@ -61,7 +61,7 @@ template inline __host__ __device__ TBase _pixMinVal(); template<> static inline __host__ __device__ Ncv8u _pixMinVal() {return 0;} template<> static inline __host__ __device__ Ncv16u _pixMinVal() {return 0;} template<> static inline __host__ __device__ Ncv32u _pixMinVal() {return 0;} -template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return CHAR_MIN;} +template<> static inline __host__ __device__ Ncv8s _pixMinVal() {return SCHAR_MIN;} template<> static inline __host__ __device__ Ncv16s _pixMinVal() {return SHRT_MIN;} template<> static inline __host__ __device__ Ncv32s _pixMinVal() {return INT_MIN;} template<> static inline __host__ __device__ Ncv32f _pixMinVal() {return FLT_MIN;} From 24d84a45b19dd3d2016bacf943a3811c67e804d4 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Mon, 17 Jun 2013 21:06:15 +0400 Subject: [PATCH 099/121] Made tests record in the XML output which parallel framework was used. --- .../core/include/opencv2/core/internal.hpp | 26 ++++++++++++++++++ modules/core/src/parallel.cpp | 27 +++++-------------- modules/ts/src/precomp.hpp | 2 ++ modules/ts/src/ts_func.cpp | 8 ++++++ 4 files changed, 42 insertions(+), 21 deletions(-) diff --git a/modules/core/include/opencv2/core/internal.hpp b/modules/core/include/opencv2/core/internal.hpp index 5335fa01f..10cd2caf9 100644 --- a/modules/core/include/opencv2/core/internal.hpp +++ b/modules/core/include/opencv2/core/internal.hpp @@ -50,6 +50,8 @@ #include +#include "cvconfig.h" + #if defined WIN32 || defined _WIN32 # ifndef WIN32 # define WIN32 @@ -184,6 +186,30 @@ CV_INLINE IppiSize ippiSize(int width, int height) # include "opencv2/core/eigen.hpp" #endif +#ifdef _OPENMP +# define HAVE_OPENMP +#endif + +#ifdef __APPLE__ +# define HAVE_GCD +#endif + +#if defined _MSC_VER && _MSC_VER >= 1600 +# define HAVE_CONCURRENCY +#endif + +#if defined HAVE_TBB && TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 +# define CV_PARALLEL_FRAMEWORK "tbb" +#elif defined HAVE_CSTRIPES +# define CV_PARALLEL_FRAMEWORK "cstripes" +#elif defined HAVE_OPENMP +# define CV_PARALLEL_FRAMEWORK "openmp" +#elif defined HAVE_GCD +# define CV_PARALLEL_FRAMEWORK "gcd" +#elif defined HAVE_CONCURRENCY +# define CV_PARALLEL_FRAMEWORK "ms-concurrency" +#endif + #ifdef __cplusplus namespace cv diff --git a/modules/core/src/parallel.cpp b/modules/core/src/parallel.cpp index 0b2a845ac..51b165275 100644 --- a/modules/core/src/parallel.cpp +++ b/modules/core/src/parallel.cpp @@ -61,17 +61,6 @@ #endif #endif -#ifdef _OPENMP - #define HAVE_OPENMP -#endif - -#ifdef __APPLE__ - #define HAVE_GCD -#endif - -#if defined _MSC_VER && _MSC_VER >= 1600 - #define HAVE_CONCURRENCY -#endif /* IMPORTANT: always use the same order of defines 1. HAVE_TBB - 3rdparty library, should be explicitly enabled @@ -110,10 +99,6 @@ #endif #endif -#if defined HAVE_TBB || defined HAVE_CSTRIPES || defined HAVE_OPENMP || defined HAVE_GCD || defined HAVE_CONCURRENCY - #define HAVE_PARALLEL_FRAMEWORK -#endif - namespace cv { ParallelLoopBody::~ParallelLoopBody() {} @@ -121,7 +106,7 @@ namespace cv namespace { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK class ParallelLoopBodyWrapper { public: @@ -218,7 +203,7 @@ public: static SchedPtr pplScheduler; #endif -#endif // HAVE_PARALLEL_FRAMEWORK +#endif // CV_PARALLEL_FRAMEWORK } //namespace @@ -226,7 +211,7 @@ static SchedPtr pplScheduler; void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, double nstripes) { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK if(numThreads != 0) { @@ -281,7 +266,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, } else -#endif // HAVE_PARALLEL_FRAMEWORK +#endif // CV_PARALLEL_FRAMEWORK { (void)nstripes; body(range); @@ -290,7 +275,7 @@ void cv::parallel_for_(const cv::Range& range, const cv::ParallelLoopBody& body, int cv::getNumThreads(void) { -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK if(numThreads == 0) return 1; @@ -333,7 +318,7 @@ int cv::getNumThreads(void) void cv::setNumThreads( int threads ) { (void)threads; -#ifdef HAVE_PARALLEL_FRAMEWORK +#ifdef CV_PARALLEL_FRAMEWORK numThreads = threads; #endif diff --git a/modules/ts/src/precomp.hpp b/modules/ts/src/precomp.hpp index 10acd7ad8..0b2adacc4 100644 --- a/modules/ts/src/precomp.hpp +++ b/modules/ts/src/precomp.hpp @@ -1,4 +1,6 @@ +#include "opencv2/core/core.hpp" #include "opencv2/core/core_c.h" +#include "opencv2/core/internal.hpp" #include "opencv2/ts/ts.hpp" #ifdef GTEST_LINKED_AS_SHARED_LIBRARY diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index 1d636e674..7a292d71c 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2958,6 +2958,14 @@ void printVersionInfo(bool useStdOut) ::testing::Test::RecordProperty("inner_version", ver); if(useStdOut) std::cout << ver << std::endl; } + +#ifdef CV_PARALLEL_FRAMEWORK + ::testing::Test::RecordProperty("cv_parallel_framework", CV_PARALLEL_FRAMEWORK); + if (useStdOut) + { + std::cout << "Parallel framework: " << CV_PARALLEL_FRAMEWORK << std::endl; + } +#endif } } //namespace cvtest From 4af7d65224f23739176c49341d8bcf795a8ab5ea Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Tue, 18 Jun 2013 18:08:38 +0400 Subject: [PATCH 100/121] Made tests record information about CPU features and Tegra optimization status. --- modules/ts/src/ts_func.cpp | 42 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index 7a292d71c..e2998149d 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2,6 +2,10 @@ #include #include +#ifdef HAVE_TEGRA_OPTIMIZATION +#include "tegra.hpp" +#endif + using namespace cv; namespace cvtest @@ -2966,6 +2970,44 @@ void printVersionInfo(bool useStdOut) std::cout << "Parallel framework: " << CV_PARALLEL_FRAMEWORK << std::endl; } #endif + + std::string cpu_features; + +#if CV_SSE + if (checkHardwareSupport(CV_CPU_SSE)) cpu_features += " sse"; +#endif +#if CV_SSE2 + if (checkHardwareSupport(CV_CPU_SSE2)) cpu_features += " sse2"; +#endif +#if CV_SSE3 + if (checkHardwareSupport(CV_CPU_SSE3)) cpu_features += " sse3"; +#endif +#if CV_SSSE3 + if (checkHardwareSupport(CV_CPU_SSSE3)) cpu_features += " ssse3"; +#endif +#if CV_SSE4_1 + if (checkHardwareSupport(CV_CPU_SSE4_1)) cpu_features += " sse4.1"; +#endif +#if CV_SSE4_2 + if (checkHardwareSupport(CV_CPU_SSE4_2)) cpu_features += " sse4.2"; +#endif +#if CV_AVX + if (checkHardwareSupport(CV_CPU_AVX)) cpu_features += " avx"; +#endif +#if CV_NEON + cpu_features += " neon"; // NEON is currently not checked at runtime +#endif + + cpu_features.erase(0, 1); // erase initial space + + ::testing::Test::RecordProperty("cv_cpu_features", cpu_features); + if (useStdOut) std::cout << "CPU features: " << cpu_features << std::endl; + +#ifdef HAVE_TEGRA_OPTIMIZATION + const char * tegra_optimization = tegra::isDeviceSupported() ? "enabled" : "disabled"; + ::testing::Test::RecordProperty("cv_tegra_optimization", tegra_optimization); + if (useStdOut) std::cout << "Tegra optimization: " << tegra_optimization << std::endl; +#endif } } //namespace cvtest From 26c246140a31556fd116bb53044575a0f9b02b84 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 19 Jun 2013 11:20:45 +0800 Subject: [PATCH 101/121] optimize hog --- modules/ocl/src/hog.cpp | 508 +++++++++++++++-------- modules/ocl/src/opencl/objdetect_hog.cl | 528 +++++++++++++++++------- 2 files changed, 711 insertions(+), 325 deletions(-) diff --git a/modules/ocl/src/hog.cpp b/modules/ocl/src/hog.cpp index a3514586f..3533cce69 100644 --- a/modules/ocl/src/hog.cpp +++ b/modules/ocl/src/hog.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Wenju He, wenju@multicorewareinc.com +// Wenju He, wenju@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -48,13 +48,107 @@ using namespace cv; using namespace cv::ocl; using namespace std; - #define CELL_WIDTH 8 #define CELL_HEIGHT 8 #define CELLS_PER_BLOCK_X 2 #define CELLS_PER_BLOCK_Y 2 #define NTHREADS 256 +static oclMat gauss_w_lut; +static bool hog_device_cpu; +/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */ +static const float gaussian_interp_lut[] = +{ + /* gaussian lut */ + 0.01831564f, 0.02926831f, 0.04393693f, 0.06196101f, 0.08208500f, 0.10215643f, + 0.11943297f, 0.13117145f, 0.13533528f, 0.13117145f, 0.11943297f, 0.10215643f, + 0.08208500f, 0.06196101f, 0.04393693f, 0.02926831f, 0.02926831f, 0.04677062f, + 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, 0.19085334f, 0.20961139f, + 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, 0.13117145f, 0.09901341f, + 0.07021102f, 0.04677062f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, + 0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, + 0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, + 0.06196101f, 0.09901341f, 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, + 0.40403652f, 0.44374731f, 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, + 0.27768996f, 0.20961139f, 0.14863673f, 0.09901341f, 0.08208500f, 0.13117145f, + 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, 0.53526145f, 0.58786964f, + 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, 0.36787945f, 0.27768996f, + 0.19691168f, 0.13117145f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, + 0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, + 0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, + 0.11943297f, 0.19085334f, 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, + 0.77880079f, 0.85534531f, 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, + 0.53526145f, 0.40403652f, 0.28650481f, 0.19085334f, 0.13117145f, 0.20961139f, + 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, 0.85534531f, 0.93941307f, + 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, 0.58786964f, 0.44374731f, + 0.31466395f, 0.20961139f, 0.13533528f, 0.21626517f, 0.32465246f, 0.45783335f, + 0.60653067f, 0.75483960f, 0.88249689f, 0.96923321f, 1.00000000f, 0.96923321f, + 0.88249689f, 0.75483960f, 0.60653067f, 0.45783335f, 0.32465246f, 0.21626517f, + 0.13117145f, 0.20961139f, 0.31466395f, 0.44374731f, 0.58786964f, 0.73161560f, + 0.85534531f, 0.93941307f, 0.96923321f, 0.93941307f, 0.85534531f, 0.73161560f, + 0.58786964f, 0.44374731f, 0.31466395f, 0.20961139f, 0.11943297f, 0.19085334f, + 0.28650481f, 0.40403652f, 0.53526145f, 0.66614360f, 0.77880079f, 0.85534531f, + 0.88249689f, 0.85534531f, 0.77880079f, 0.66614360f, 0.53526145f, 0.40403652f, + 0.28650481f, 0.19085334f, 0.10215643f, 0.16324551f, 0.24506053f, 0.34559074f, + 0.45783335f, 0.56978285f, 0.66614360f, 0.73161560f, 0.75483960f, 0.73161560f, + 0.66614360f, 0.56978285f, 0.45783335f, 0.34559074f, 0.24506053f, 0.16324551f, + 0.08208500f, 0.13117145f, 0.19691168f, 0.27768996f, 0.36787945f, 0.45783335f, + 0.53526145f, 0.58786964f, 0.60653067f, 0.58786964f, 0.53526145f, 0.45783335f, + 0.36787945f, 0.27768996f, 0.19691168f, 0.13117145f, 0.06196101f, 0.09901341f, + 0.14863673f, 0.20961139f, 0.27768996f, 0.34559074f, 0.40403652f, 0.44374731f, + 0.45783335f, 0.44374731f, 0.40403652f, 0.34559074f, 0.27768996f, 0.20961139f, + 0.14863673f, 0.09901341f, 0.04393693f, 0.07021102f, 0.10539922f, 0.14863673f, + 0.19691168f, 0.24506053f, 0.28650481f, 0.31466395f, 0.32465246f, 0.31466395f, + 0.28650481f, 0.24506053f, 0.19691168f, 0.14863673f, 0.10539922f, 0.07021102f, + 0.02926831f, 0.04677062f, 0.07021102f, 0.09901341f, 0.13117145f, 0.16324551f, + 0.19085334f, 0.20961139f, 0.21626517f, 0.20961139f, 0.19085334f, 0.16324551f, + 0.13117145f, 0.09901341f, 0.07021102f, 0.04677062f, + /* interp_weight lut */ + 0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, + 0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, + 0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f, 0.01171875f, 0.03515625f, + 0.05859375f, 0.08203125f, 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, + 0.17578125f, 0.15234375f, 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, + 0.03515625f, 0.01171875f, 0.01953125f, 0.05859375f, 0.09765625f, 0.13671875f, + 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, 0.29296875f, 0.25390625f, + 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, 0.05859375f, 0.01953125f, + 0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, + 0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, + 0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.03515625f, 0.10546875f, + 0.17578125f, 0.24609375f, 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, + 0.52734375f, 0.45703125f, 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, + 0.10546875f, 0.03515625f, 0.04296875f, 0.12890625f, 0.21484375f, 0.30078125f, + 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, 0.64453125f, 0.55859375f, + 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, 0.12890625f, 0.04296875f, + 0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, + 0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, + 0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.05859375f, 0.17578125f, + 0.29296875f, 0.41015625f, 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, + 0.87890625f, 0.76171875f, 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, + 0.17578125f, 0.05859375f, 0.05859375f, 0.17578125f, 0.29296875f, 0.41015625f, + 0.52734375f, 0.64453125f, 0.76171875f, 0.87890625f, 0.87890625f, 0.76171875f, + 0.64453125f, 0.52734375f, 0.41015625f, 0.29296875f, 0.17578125f, 0.05859375f, + 0.05078125f, 0.15234375f, 0.25390625f, 0.35546875f, 0.45703125f, 0.55859375f, + 0.66015625f, 0.76171875f, 0.76171875f, 0.66015625f, 0.55859375f, 0.45703125f, + 0.35546875f, 0.25390625f, 0.15234375f, 0.05078125f, 0.04296875f, 0.12890625f, + 0.21484375f, 0.30078125f, 0.38671875f, 0.47265625f, 0.55859375f, 0.64453125f, + 0.64453125f, 0.55859375f, 0.47265625f, 0.38671875f, 0.30078125f, 0.21484375f, + 0.12890625f, 0.04296875f, 0.03515625f, 0.10546875f, 0.17578125f, 0.24609375f, + 0.31640625f, 0.38671875f, 0.45703125f, 0.52734375f, 0.52734375f, 0.45703125f, + 0.38671875f, 0.31640625f, 0.24609375f, 0.17578125f, 0.10546875f, 0.03515625f, + 0.02734375f, 0.08203125f, 0.13671875f, 0.19140625f, 0.24609375f, 0.30078125f, + 0.35546875f, 0.41015625f, 0.41015625f, 0.35546875f, 0.30078125f, 0.24609375f, + 0.19140625f, 0.13671875f, 0.08203125f, 0.02734375f, 0.01953125f, 0.05859375f, + 0.09765625f, 0.13671875f, 0.17578125f, 0.21484375f, 0.25390625f, 0.29296875f, + 0.29296875f, 0.25390625f, 0.21484375f, 0.17578125f, 0.13671875f, 0.09765625f, + 0.05859375f, 0.01953125f, 0.01171875f, 0.03515625f, 0.05859375f, 0.08203125f, + 0.10546875f, 0.12890625f, 0.15234375f, 0.17578125f, 0.17578125f, 0.15234375f, + 0.12890625f, 0.10546875f, 0.08203125f, 0.05859375f, 0.03515625f, 0.01171875f, + 0.00390625f, 0.01171875f, 0.01953125f, 0.02734375f, 0.03515625f, 0.04296875f, + 0.05078125f, 0.05859375f, 0.05859375f, 0.05078125f, 0.04296875f, 0.03515625f, + 0.02734375f, 0.01953125f, 0.01171875f, 0.00390625f +}; + namespace cv { namespace ocl @@ -78,38 +172,43 @@ namespace cv int cnblocks_win_x; int cnblocks_win_y; int cblock_hist_size; - int cblock_hist_size_2up; int cdescr_size; int cdescr_width; + int cdescr_height; void set_up_constants(int nbins, int block_stride_x, int block_stride_y, int nblocks_win_x, int nblocks_win_y); void compute_hists(int nbins, int block_stride_x, int blovck_stride_y, - int height, int width, const cv::ocl::oclMat &grad, - const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists); + int height, int width, float sigma, const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, + const cv::ocl::oclMat &gauss_w_lut, cv::ocl::oclMat &block_hists); void normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat &block_hists, float threshold); + int height, int width, cv::ocl::oclMat &block_hists, + float threshold); void classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, + const cv::ocl::oclMat &coefs, float free_coef, float threshold, cv::ocl::oclMat &labels); - void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors); - void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, const cv::ocl::oclMat &block_hists, + void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, + int block_stride_x, int win_stride_y, int win_stride_x, + int height, int width, const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors); void compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); + float angle_scale, cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, bool correct_gamma); void compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma); - - void resize( const oclMat &src, oclMat &dst, const Size sz); + float angle_scale, cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, bool correct_gamma); } } } @@ -117,8 +216,14 @@ namespace cv using namespace ::cv::ocl::device; -cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, Size cell_size_, - int nbins_, double win_sigma_, double threshold_L2hys_, bool gamma_correction_, int nlevels_) +static inline int divUp(int total, int grain) +{ + return (total + grain - 1) / grain; +} + +cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size block_stride_, + Size cell_size_, int nbins_, double win_sigma_, + double threshold_L2hys_, bool gamma_correction_, int nlevels_) : win_size(win_size_), block_size(block_size_), block_stride(block_stride_), @@ -132,19 +237,27 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo CV_Assert((win_size.width - block_size.width ) % block_stride.width == 0 && (win_size.height - block_size.height) % block_stride.height == 0); - CV_Assert(block_size.width % cell_size.width == 0 && block_size.height % cell_size.height == 0); + CV_Assert(block_size.width % cell_size.width == 0 && + block_size.height % cell_size.height == 0); CV_Assert(block_stride == cell_size); CV_Assert(cell_size == Size(8, 8)); - Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + Size cells_per_block(block_size.width / cell_size.width, + block_size.height / cell_size.height); CV_Assert(cells_per_block == Size(2, 2)); cv::Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); - hog::set_up_constants(nbins, block_stride.width, block_stride.height, blocks_per_win.width, blocks_per_win.height); + hog::set_up_constants(nbins, block_stride.width, block_stride.height, + blocks_per_win.width, blocks_per_win.height); effect_size = Size(0, 0); + + if (queryDeviceInfo()) + hog_device_cpu = true; + else + hog_device_cpu = false; } size_t cv::ocl::HOGDescriptor::getDescriptorSize() const @@ -154,7 +267,8 @@ size_t cv::ocl::HOGDescriptor::getDescriptorSize() const size_t cv::ocl::HOGDescriptor::getBlockHistogramSize() const { - Size cells_per_block = Size(block_size.width / cell_size.width, block_size.height / cell_size.height); + Size cells_per_block = Size(block_size.width / cell_size.width, + block_size.height / cell_size.height); return (size_t)(nbins * cells_per_block.area()); } @@ -167,7 +281,8 @@ bool cv::ocl::HOGDescriptor::checkDetectorSize() const { size_t detector_size = detector.rows * detector.cols; size_t descriptor_size = getDescriptorSize(); - return detector_size == 0 || detector_size == descriptor_size || detector_size == descriptor_size + 1; + return detector_size == 0 || detector_size == descriptor_size || + detector_size == descriptor_size + 1; } void cv::ocl::HOGDescriptor::setSVMDetector(const vector &_detector) @@ -207,10 +322,16 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride) const size_t block_hist_size = getBlockHistogramSize(); const Size blocks_per_img = numPartsWithin(img.size(), block_size, block_stride); - block_hists.create(1, static_cast(block_hist_size * blocks_per_img.area()), CV_32F); + block_hists.create(1, + static_cast(block_hist_size * blocks_per_img.area()) + 256, CV_32F); Size wins_per_img = numPartsWithin(img.size(), win_size, win_stride); labels.create(1, wins_per_img.area(), CV_8U); + + vector v_lut = vector(gaussian_interp_lut, gaussian_interp_lut + + sizeof(gaussian_interp_lut) / sizeof(gaussian_interp_lut[0])); + Mat m_lut(v_lut); + gauss_w_lut.upload(m_lut.reshape(1,1)); } void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oclMat &qangle) @@ -221,29 +342,34 @@ void cv::ocl::HOGDescriptor::computeGradient(const oclMat &img, oclMat &grad, oc switch (img.type()) { case CV_8UC1: - hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); + hog::compute_gradients_8UC1(effect_size.height, effect_size.width, img, + angleScale, grad, qangle, gamma_correction); break; case CV_8UC4: - hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, angleScale, grad, qangle, gamma_correction); + hog::compute_gradients_8UC4(effect_size.height, effect_size.width, img, + angleScale, grad, qangle, gamma_correction); break; } } + void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img) { - computeGradient(img, grad, qangle); + computeGradient(img, this->grad, this->qangle); - hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - grad, qangle, (float)getWinSigma(), block_hists); + hog::compute_hists(nbins, block_stride.width, block_stride.height, effect_size.height, + effect_size.width, (float)getWinSigma(), grad, qangle, gauss_w_lut, block_hists); - hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, effect_size.width, - block_hists, (float)threshold_L2hys); + hog::normalize_hists(nbins, block_stride.width, block_stride.height, effect_size.height, + effect_size.width, block_hists, (float)threshold_L2hys); } -void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, oclMat &descriptors, int descr_format) +void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, + oclMat &descriptors, int descr_format) { - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); @@ -253,17 +379,20 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, Size blocks_per_win = numPartsWithin(win_size, block_size, block_stride); Size wins_per_img = numPartsWithin(effect_size, win_size, win_stride); - descriptors.create(wins_per_img.area(), static_cast(blocks_per_win.area() * block_hist_size), CV_32F); + descriptors.create(wins_per_img.area(), + static_cast(blocks_per_win.area() * block_hist_size), CV_32F); switch (descr_format) { case DESCR_FORMAT_ROW_BY_ROW: - hog::extract_descrs_by_rows(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + hog::extract_descrs_by_rows(win_size.height, win_size.width, + block_stride.height, block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, descriptors); break; case DESCR_FORMAT_COL_BY_COL: - hog::extract_descrs_by_cols(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, descriptors); + hog::extract_descrs_by_cols(win_size.height, win_size.width, + block_stride.height, block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, descriptors); break; default: CV_Error(CV_StsBadArg, "Unknown descriptor format"); @@ -271,7 +400,8 @@ void cv::ocl::HOGDescriptor::getDescriptors(const oclMat &img, Size win_stride, } -void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, double hit_threshold, Size win_stride, Size padding) +void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, + double hit_threshold, Size win_stride, Size padding) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(padding == Size(0, 0)); @@ -283,14 +413,16 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, doub if (win_stride == Size()) win_stride = block_stride; else - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); computeBlockHistograms(img); - hog::classify_hists(win_size.height, win_size.width, block_stride.height, block_stride.width, - win_stride.height, win_stride.width, effect_size.height, effect_size.width, block_hists, - detector, (float)free_coef, (float)hit_threshold, labels); + hog::classify_hists(win_size.height, win_size.width, block_stride.height, + block_stride.width, win_stride.height, win_stride.width, + effect_size.height, effect_size.width, block_hists, detector, + (float)free_coef, (float)hit_threshold, labels); labels.download(labels_host); unsigned char *vec = labels_host.ptr(); @@ -306,8 +438,9 @@ void cv::ocl::HOGDescriptor::detect(const oclMat &img, vector &hits, doub -void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &found_locations, double hit_threshold, - Size win_stride, Size padding, double scale0, int group_threshold) +void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &found_locations, + double hit_threshold, Size win_stride, Size padding, + double scale0, int group_threshold) { CV_Assert(img.type() == CV_8UC1 || img.type() == CV_8UC4); CV_Assert(scale0 > 1); @@ -333,7 +466,8 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &f if (win_stride == Size()) win_stride = block_stride; else - CV_Assert(win_stride.width % block_stride.width == 0 && win_stride.height % block_stride.height == 0); + CV_Assert(win_stride.width % block_stride.width == 0 && + win_stride.height % block_stride.height == 0); init_buffer(img, win_stride); image_scale.create(img.size(), img.type()); @@ -347,16 +481,18 @@ void cv::ocl::HOGDescriptor::detectMultiScale(const oclMat &img, vector &f } else { - hog::resize( img, image_scale, effect_size); + resize(img, image_scale, effect_size); detect(image_scale, locations, hit_threshold, win_stride, padding); } - Size scaled_win_size(cvRound(win_size.width * scale), cvRound(win_size.height * scale)); + Size scaled_win_size(cvRound(win_size.width * scale), + cvRound(win_size.height * scale)); for (size_t j = 0; j < locations.size(); j++) - all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, scaled_win_size)); + all_candidates.push_back(Rect(Point2d((CvPoint)locations[j]) * scale, + scaled_win_size)); } found_locations.assign(all_candidates.begin(), all_candidates.end()); - groupRectangles(found_locations, group_threshold, 0.2/*magic number copied from CPU version*/); + groupRectangles(found_locations, group_threshold, 0.2); } int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) @@ -364,9 +500,11 @@ int cv::ocl::HOGDescriptor::numPartsWithin(int size, int part_size, int stride) return (size - part_size + stride) / stride; } -cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, cv::Size stride) +cv::Size cv::ocl::HOGDescriptor::numPartsWithin(cv::Size size, cv::Size part_size, + cv::Size stride) { - return Size(numPartsWithin(size.width, part_size.width, stride.width), numPartsWithin(size.height, part_size.height, stride.height)); + return Size(numPartsWithin(size.width, part_size.width, stride.width), + numPartsWithin(size.height, part_size.height, stride.height)); } std::vector cv::ocl::HOGDescriptor::getDefaultPeopleDetector() @@ -1547,8 +1685,9 @@ static int power_2up(unsigned int n) return -1; // Input is too big } -void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int block_stride_y, - int nblocks_win_x, int nblocks_win_y) +void cv::ocl::device::hog::set_up_constants(int nbins, + int block_stride_x, int block_stride_y, + int nblocks_win_x, int nblocks_win_y) { cnbins = nbins; cblock_stride_x = block_stride_x; @@ -1559,53 +1698,32 @@ void cv::ocl::device::hog::set_up_constants(int nbins, int block_stride_x, int b int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; cblock_hist_size = block_hist_size; - int block_hist_size_2up = power_2up(block_hist_size); - cblock_hist_size_2up = block_hist_size_2up; - int descr_width = nblocks_win_x * block_hist_size; cdescr_width = descr_width; + cdescr_height = nblocks_win_y; int descr_size = descr_width * nblocks_win_y; cdescr_size = descr_size; } -static inline int divUp(int total, int grain) -{ - return (total + grain - 1) / grain; -} - -static void openCLExecuteKernel_hog(Context *clCxt , const char **source, string kernelName, - size_t globalThreads[3], size_t localThreads[3], - vector< pair > &args) -{ - cl_kernel kernel = openCLGetKernelFromSource(clCxt, source, kernelName); - size_t wave_size = queryDeviceInfo(kernel); - openCLSafeCall(clReleaseKernel(kernel)); - if (wave_size <= 16) - { - char build_options[64]; - sprintf(build_options, (wave_size == 16) ? "-D WAVE_SIZE_16" : "-D WAVE_SIZE_1"); - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1, build_options); - } - else - openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, -1, -1); -} - -void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, const cv::ocl::oclMat &grad, - const cv::ocl::oclMat &qangle, float sigma, cv::ocl::oclMat &block_hists) +void cv::ocl::device::hog::compute_hists(int nbins, + int block_stride_x, int block_stride_y, + int height, int width, float sigma, + const cv::ocl::oclMat &grad, + const cv::ocl::oclMat &qangle, + const cv::ocl::oclMat &gauss_w_lut, + cv::ocl::oclMat &block_hists) { Context *clCxt = Context::getContext(); - string kernelName = "compute_hists_kernel"; vector< pair > args; + string kernelName = (sigma == 4.0f) ? "compute_hists_lut_kernel" : + "compute_hists_kernel"; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; - + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) + / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) + / block_stride_y; int blocks_total = img_block_width * img_block_height; - int blocks_in_group = 4; - size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; - size_t globalThreads[3] = { divUp(blocks_total, blocks_in_group) * localThreads[0], 2, 1 }; int grad_quadstep = grad.step >> 2; int qangle_step = qangle.step; @@ -1613,6 +1731,11 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc // Precompute gaussian spatial window parameter float scale = 1.f / (2.f * sigma * sigma); + int blocks_in_group = 4; + size_t localThreads[3] = { blocks_in_group * 24, 2, 1 }; + size_t globalThreads[3] = { + divUp(img_block_width * img_block_height, blocks_in_group) * localThreads[0], 2, 1 }; + int hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12) * sizeof(float); int final_hists_size = (nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y) * sizeof(float); int smem = (hists_size + final_hists_size) * blocks_in_group; @@ -1628,62 +1751,120 @@ void cv::ocl::device::hog::compute_hists(int nbins, int block_stride_x, int bloc args.push_back( make_pair( sizeof(cl_int), (void *)&qangle_step)); args.push_back( make_pair( sizeof(cl_mem), (void *)&grad.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&qangle.data)); - args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); + if (kernelName.compare("compute_hists_lut_kernel") == 0) + args.push_back( make_pair( sizeof(cl_mem), (void *)&gauss_w_lut.data)); + else + args.push_back( make_pair( sizeof(cl_float), (void *)&scale)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( smem, (void *)NULL)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::normalize_hists(int nbins, int block_stride_x, int block_stride_y, - int height, int width, cv::ocl::oclMat &block_hists, float threshold) +void cv::ocl::device::hog::normalize_hists(int nbins, + int block_stride_x, int block_stride_y, + int height, int width, + cv::ocl::oclMat &block_hists, + float threshold) { Context *clCxt = Context::getContext(); - string kernelName = "normalize_hists_kernel"; vector< pair > args; + string kernelName; int block_hist_size = nbins * CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y; - int nthreads = power_2up(block_hist_size); + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) + / block_stride_x; + int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) + / block_stride_y; + int nthreads; + size_t globalThreads[3] = { 1, 1, 1 }; + size_t localThreads[3] = { 1, 1, 1 }; + + if ( nbins == 9 ) + { + /* optimized for the case of 9 bins */ + kernelName = "normalize_hists_36_kernel"; + int blocks_in_group = NTHREADS / block_hist_size; + nthreads = blocks_in_group * block_hist_size; + int num_groups = divUp( img_block_width * img_block_height, blocks_in_group); + globalThreads[0] = nthreads * num_groups; + localThreads[0] = nthreads; + } + else + { + kernelName = "normalize_hists_kernel"; + nthreads = power_2up(block_hist_size); + globalThreads[0] = img_block_width * nthreads; + globalThreads[1] = img_block_height; + localThreads[0] = nthreads; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y; - size_t globalThreads[3] = { img_block_width * nthreads, img_block_height, 1 }; - size_t localThreads[3] = { nthreads, 1, 1 }; + if ((nthreads < 32) || (nthreads > 512) ) + cv::ocl::error("normalize_hists: histogram's size is too small or too big", + __FILE__, __LINE__, "normalize_hists"); - if ((nthreads < 32) || (nthreads > 512) ) - cv::ocl::error("normalize_hists: histogram's size is too small or too big", __FILE__, __LINE__, "normalize_hists"); + args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); + args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); + } - args.push_back( make_pair( sizeof(cl_int), (void *)&nthreads)); - args.push_back( make_pair( sizeof(cl_int), (void *)&block_hist_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( nthreads * sizeof(float), (void *)NULL)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + if(hog_device_cpu) + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1, "-D CPU"); + else + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int block_stride_y, - int block_stride_x, int win_stride_y, int win_stride_x, int height, - int width, const cv::ocl::oclMat &block_hists, const cv::ocl::oclMat &coefs, float free_coef, - float threshold, cv::ocl::oclMat &labels) +void cv::ocl::device::hog::classify_hists(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + const cv::ocl::oclMat &coefs, + float free_coef, float threshold, + cv::ocl::oclMat &labels) { Context *clCxt = Context::getContext(); - string kernelName = "classify_hists_kernel"; vector< pair > args; + int nthreads; + string kernelName; + switch (cdescr_width) + { + case 180: + nthreads = 180; + kernelName = "classify_hists_180_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); + break; + case 252: + nthreads = 256; + kernelName = "classify_hists_252_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_height)); + break; + default: + nthreads = 256; + kernelName = "classify_hists_kernel"; + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); + args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); + } + int win_block_stride_x = win_stride_x / block_stride_x; int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; - - size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; - size_t localThreads[3] = { NTHREADS, 1, 1 }; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; + size_t globalThreads[3] = { img_win_width * nthreads, img_win_height, 1 }; + size_t localThreads[3] = { nthreads, 1, 1 }; args.push_back( make_pair( sizeof(cl_int), (void *)&cblock_hist_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_size)); - args.push_back( make_pair( sizeof(cl_int), (void *)&cdescr_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&img_win_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&img_block_width)); args.push_back( make_pair( sizeof(cl_int), (void *)&win_block_stride_x)); @@ -1694,12 +1875,20 @@ void cv::ocl::device::hog::classify_hists(int win_height, int win_width, int blo args.push_back( make_pair( sizeof(cl_float), (void *)&threshold)); args.push_back( make_pair( sizeof(cl_mem), (void *)&labels.data)); - openCLExecuteKernel_hog(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args); + if(hog_device_cpu) + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1, "-D CPU"); + else + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) +void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); string kernelName = "extract_descrs_by_rows_kernel"; @@ -1709,7 +1898,8 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; int descriptors_quadstep = descriptors.step >> 2; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; @@ -1725,12 +1915,16 @@ void cv::ocl::device::hog::extract_descrs_by_rows(int win_height, int win_width, args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, - int win_stride_y, int win_stride_x, int height, int width, - const cv::ocl::oclMat &block_hists, cv::ocl::oclMat &descriptors) +void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, + int block_stride_y, int block_stride_x, + int win_stride_y, int win_stride_x, + int height, int width, + const cv::ocl::oclMat &block_hists, + cv::ocl::oclMat &descriptors) { Context *clCxt = Context::getContext(); string kernelName = "extract_descrs_by_cols_kernel"; @@ -1740,7 +1934,8 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, int win_block_stride_y = win_stride_y / block_stride_y; int img_win_width = (width - win_width + win_stride_x) / win_stride_x; int img_win_height = (height - win_height + win_stride_y) / win_stride_y; - int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x; + int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / + block_stride_x; int descriptors_quadstep = descriptors.step >> 2; size_t globalThreads[3] = { img_win_width * NTHREADS, img_win_height, 1 }; @@ -1757,11 +1952,16 @@ void cv::ocl::device::hog::extract_descrs_by_cols(int win_height, int win_width, args.push_back( make_pair( sizeof(cl_mem), (void *)&block_hists.data)); args.push_back( make_pair( sizeof(cl_mem), (void *)&descriptors.data)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, + const cv::ocl::oclMat &img, + float angle_scale, + cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, + bool correct_gamma) { Context *clCxt = Context::getContext(); string kernelName = "compute_gradients_8UC1_kernel"; @@ -1786,11 +1986,16 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width, const c args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); } -void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const cv::ocl::oclMat &img, - float angle_scale, cv::ocl::oclMat &grad, cv::ocl::oclMat &qangle, bool correct_gamma) +void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, + const cv::ocl::oclMat &img, + float angle_scale, + cv::ocl::oclMat &grad, + cv::ocl::oclMat &qangle, + bool correct_gamma) { Context *clCxt = Context::getContext(); string kernelName = "compute_gradients_8UC4_kernel"; @@ -1816,39 +2021,6 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width, const c args.push_back( make_pair( sizeof(cl_char), (void *)&correctGamma)); args.push_back( make_pair( sizeof(cl_int), (void *)&cnbins)); - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); -} - -void cv::ocl::device::hog::resize( const oclMat &src, oclMat &dst, const Size sz) -{ - CV_Assert( (src.channels() == dst.channels()) ); - Context *clCxt = Context::getContext(); - - string kernelName = (src.type() == CV_8UC1) ? "resize_8UC1_kernel" : "resize_8UC4_kernel"; - size_t blkSizeX = 16, blkSizeY = 16; - size_t glbSizeX = sz.width % blkSizeX == 0 ? sz.width : (sz.width / blkSizeX + 1) * blkSizeX; - size_t glbSizeY = sz.height % blkSizeY == 0 ? sz.height : (sz.height / blkSizeY + 1) * blkSizeY; - size_t globalThreads[3] = {glbSizeX, glbSizeY, 1}; - size_t localThreads[3] = {blkSizeX, blkSizeY, 1}; - - float ifx = (float)src.cols / sz.width; - float ify = (float)src.rows / sz.height; - int src_step = static_cast(src.step); - int dst_step = static_cast(dst.step); - - vector< pair > args; - args.push_back( make_pair(sizeof(cl_mem), (void *)&dst.data)); - args.push_back( make_pair(sizeof(cl_mem), (void *)&src.data)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.offset)); - args.push_back( make_pair(sizeof(cl_int), (void *)&dst_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src_step)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.cols)); - args.push_back( make_pair(sizeof(cl_int), (void *)&src.rows)); - args.push_back( make_pair(sizeof(cl_int), (void *)&sz.width)); - args.push_back( make_pair(sizeof(cl_int), (void *)&sz.height)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ifx)); - args.push_back( make_pair(sizeof(cl_float), (void *)&ify)); - - openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, localThreads, args, -1, -1); -} + openCLExecuteKernel(clCxt, &objdetect_hog, kernelName, globalThreads, + localThreads, args, -1, -1); +} \ No newline at end of file diff --git a/modules/ocl/src/opencl/objdetect_hog.cl b/modules/ocl/src/opencl/objdetect_hog.cl index 8852facae..05d538330 100644 --- a/modules/ocl/src/opencl/objdetect_hog.cl +++ b/modules/ocl/src/opencl/objdetect_hog.cl @@ -43,7 +43,6 @@ // //M*/ - #define CELL_WIDTH 8 #define CELL_HEIGHT 8 #define CELLS_PER_BLOCK_X 2 @@ -51,6 +50,100 @@ #define NTHREADS 256 #define CV_PI_F 3.1415926535897932384626433832795f +//---------------------------------------------------------------------------- +// Histogram computation +// 12 threads for a cell, 12x4 threads per block +// Use pre-computed gaussian and interp_weight lookup tables if sigma is 4.0f +__kernel void compute_hists_lut_kernel( + const int cblock_stride_x, const int cblock_stride_y, + const int cnbins, const int cblock_hist_size, const int img_block_width, + const int blocks_in_group, const int blocks_total, + const int grad_quadstep, const int qangle_step, + __global const float* grad, __global const uchar* qangle, + __global const float* gauss_w_lut, + __global float* block_hists, __local float* smem) +{ + const int lx = get_local_id(0); + const int lp = lx / 24; /* local group id */ + const int gid = get_group_id(0) * blocks_in_group + lp;/* global group id */ + const int gidY = gid / img_block_width; + const int gidX = gid - gidY * img_block_width; + + const int lidX = lx - lp * 24; + const int lidY = get_local_id(1); + + const int cell_x = lidX / 12; + const int cell_y = lidY; + const int cell_thread_x = lidX - cell_x * 12; + + __local float* hists = smem + lp * cnbins * (CELLS_PER_BLOCK_X * + CELLS_PER_BLOCK_Y * 12 + CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y); + __local float* final_hist = hists + cnbins * + (CELLS_PER_BLOCK_X * CELLS_PER_BLOCK_Y * 12); + + const int offset_x = gidX * cblock_stride_x + (cell_x << 2) + cell_thread_x; + const int offset_y = gidY * cblock_stride_y + (cell_y << 2); + + __global const float* grad_ptr = (gid < blocks_total) ? + grad + offset_y * grad_quadstep + (offset_x << 1) : grad; + __global const uchar* qangle_ptr = (gid < blocks_total) ? + qangle + offset_y * qangle_step + (offset_x << 1) : qangle; + + __local float* hist = hists + 12 * (cell_y * CELLS_PER_BLOCK_Y + cell_x) + + cell_thread_x; + for (int bin_id = 0; bin_id < cnbins; ++bin_id) + hist[bin_id * 48] = 0.f; + + const int dist_x = -4 + cell_thread_x - 4 * cell_x; + const int dist_center_x = dist_x - 4 * (1 - 2 * cell_x); + + const int dist_y_begin = -4 - 4 * lidY; + for (int dist_y = dist_y_begin; dist_y < dist_y_begin + 12; ++dist_y) + { + float2 vote = (float2) (grad_ptr[0], grad_ptr[1]); + uchar2 bin = (uchar2) (qangle_ptr[0], qangle_ptr[1]); + + grad_ptr += grad_quadstep; + qangle_ptr += qangle_step; + + int dist_center_y = dist_y - 4 * (1 - 2 * cell_y); + + int idx = (dist_center_y + 8) * 16 + (dist_center_x + 8); + float gaussian = gauss_w_lut[idx]; + idx = (dist_y + 8) * 16 + (dist_x + 8); + float interp_weight = gauss_w_lut[256+idx]; + + hist[bin.x * 48] += gaussian * interp_weight * vote.x; + hist[bin.y * 48] += gaussian * interp_weight * vote.y; + } + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* hist_ = hist; + for (int bin_id = 0; bin_id < cnbins; ++bin_id, hist_ += 48) + { + if (cell_thread_x < 6) + hist_[0] += hist_[6]; + barrier(CLK_LOCAL_MEM_FENCE); + if (cell_thread_x < 3) + hist_[0] += hist_[3]; +#ifdef CPU + barrier(CLK_LOCAL_MEM_FENCE); +#endif + if (cell_thread_x == 0) + final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = + hist_[0] + hist_[1] + hist_[2]; + } + barrier(CLK_LOCAL_MEM_FENCE); + + int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; + if ((tid < cblock_hist_size) && (gid < blocks_total)) + { + __global float* block_hist = block_hists + + (gidY * img_block_width + gidX) * cblock_hist_size; + block_hist[tid] = final_hist[tid]; + } +} + //---------------------------------------------------------------------------- // Histogram computation // 12 threads for a cell, 12x4 threads per block @@ -125,16 +218,14 @@ __kernel void compute_hists_kernel( barrier(CLK_LOCAL_MEM_FENCE); if (cell_thread_x < 3) hist_[0] += hist_[3]; -#ifdef WAVE_SIZE_1 +#ifdef CPU barrier(CLK_LOCAL_MEM_FENCE); #endif if (cell_thread_x == 0) final_hist[(cell_x * 2 + cell_y) * cnbins + bin_id] = hist_[0] + hist_[1] + hist_[2]; } -#ifdef WAVE_SIZE_1 barrier(CLK_LOCAL_MEM_FENCE); -#endif int tid = (cell_y * CELLS_PER_BLOCK_Y + cell_x) * 12 + cell_thread_x; if ((tid < cblock_hist_size) && (gid < blocks_total)) @@ -145,6 +236,57 @@ __kernel void compute_hists_kernel( } } +//------------------------------------------------------------- +// Normalization of histograms via L2Hys_norm +// optimized for the case of 9 bins +__kernel void normalize_hists_36_kernel(__global float* block_hists, + const float threshold, __local float *squares) +{ + const int tid = get_local_id(0); + const int gid = get_global_id(0); + const int bid = tid / 36; /* block-hist id, (0 - 6) */ + const int boffset = bid * 36; /* block-hist offset in the work-group */ + const int hid = tid - boffset; /* histogram bin id, (0 - 35) */ + + float elem = block_hists[gid]; + squares[tid] = elem * elem; + barrier(CLK_LOCAL_MEM_FENCE); + + __local float* smem = squares + boffset; + float sum = smem[hid]; + if (hid < 18) + smem[hid] = sum = sum + smem[hid + 18]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 9) + smem[hid] = sum = sum + smem[hid + 9]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 4) + smem[hid] = sum + smem[hid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; + + elem = elem / (sqrt(sum) + 3.6f); + elem = min(elem, threshold); + + barrier(CLK_LOCAL_MEM_FENCE); + squares[tid] = elem * elem; + barrier(CLK_LOCAL_MEM_FENCE); + + sum = smem[hid]; + if (hid < 18) + smem[hid] = sum = sum + smem[hid + 18]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 9) + smem[hid] = sum = sum + smem[hid + 9]; + barrier(CLK_LOCAL_MEM_FENCE); + if (hid < 4) + smem[hid] = sum + smem[hid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + sum = smem[0] + smem[1] + smem[2] + smem[3] + smem[8]; + + block_hists[gid] = elem / (sqrt(sum) + 1e-3f); +} + //------------------------------------------------------------- // Normalization of histograms via L2Hys_norm // @@ -153,76 +295,50 @@ float reduce_smem(volatile __local float* smem, int size) unsigned int tid = get_local_id(0); float sum = smem[tid]; - if (size >= 512) - { - if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; - barrier(CLK_LOCAL_MEM_FENCE); - } - if (size >= 256) - { - if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; - barrier(CLK_LOCAL_MEM_FENCE); - } - if (size >= 128) - { - if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; - barrier(CLK_LOCAL_MEM_FENCE); - } - + if (size >= 512) { if (tid < 256) smem[tid] = sum = sum + smem[tid + 256]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 256) { if (tid < 128) smem[tid] = sum = sum + smem[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 128) { if (tid < 64) smem[tid] = sum = sum + smem[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); } +#ifdef CPU + if (size >= 64) { if (tid < 32) smem[tid] = sum = sum + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 32) { if (tid < 16) smem[tid] = sum = sum + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 16) { if (tid < 8) smem[tid] = sum = sum + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 8) { if (tid < 4) smem[tid] = sum = sum + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 4) { if (tid < 2) smem[tid] = sum = sum + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); } + if (size >= 2) { if (tid < 1) smem[tid] = sum = sum + smem[tid + 1]; + barrier(CLK_LOCAL_MEM_FENCE); } +#else if (tid < 32) { if (size >= 64) smem[tid] = sum = sum + smem[tid + 32]; -#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1) - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 16) - { -#endif if (size >= 32) smem[tid] = sum = sum + smem[tid + 16]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 8) - { -#endif if (size >= 16) smem[tid] = sum = sum + smem[tid + 8]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 4) - { -#endif if (size >= 8) smem[tid] = sum = sum + smem[tid + 4]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 2) - { -#endif if (size >= 4) smem[tid] = sum = sum + smem[tid + 2]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 1) - { -#endif if (size >= 2) smem[tid] = sum = sum + smem[tid + 1]; } - - barrier(CLK_LOCAL_MEM_FENCE); - sum = smem[0]; +#endif return sum; } -__kernel void normalize_hists_kernel(const int nthreads, const int block_hist_size, const int img_block_width, - __global float* block_hists, const float threshold, __local float *squares) +__kernel void normalize_hists_kernel( + const int nthreads, const int block_hist_size, const int img_block_width, + __global float* block_hists, const float threshold, __local float *squares) { const int tid = get_local_id(0); const int gidX = get_group_id(0); const int gidY = get_group_id(1); - __global float* hist = block_hists + (gidY * img_block_width + gidX) * block_hist_size + tid; + __global float* hist = block_hists + (gidY * img_block_width + gidX) * + block_hist_size + tid; float elem = 0.f; if (tid < block_hist_size) @@ -249,25 +365,98 @@ __kernel void normalize_hists_kernel(const int nthreads, const int block_hist_si //--------------------------------------------------------------------- // Linear SVM based classification -// -__kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr_size, const int cdescr_width, - const int img_win_width, const int img_block_width, - const int win_block_stride_x, const int win_block_stride_y, - __global const float * block_hists, __global const float* coefs, - float free_coef, float threshold, __global uchar* labels) +// 48x96 window, 9 bins and default parameters +// 180 threads, each thread corresponds to a bin in a row +__kernel void classify_hists_180_kernel( + const int cdescr_width, const int cdescr_height, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) { const int tid = get_local_id(0); const int gidX = get_group_id(0); const int gidY = get_group_id(1); - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; float product = 0.f; - for (int i = tid; i < cdescr_size; i += NTHREADS) + + for (int i = 0; i < cdescr_height; i++) { - int offset_y = i / cdescr_width; - int offset_x = i - offset_y * cdescr_width; - product += coefs[i] * hist[offset_y * img_block_width * cblock_hist_size + offset_x]; + product += coefs[i * cdescr_width + tid] * + hist[i * img_block_width * cblock_hist_size + tid]; + } + + __local float products[180]; + + products[tid] = product; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 90) products[tid] = product = product + products[tid + 90]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 45) products[tid] = product = product + products[tid + 45]; + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* smem = products; +#ifdef CPU + if (tid < 13) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if (tid < 16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else + if (tid < 13) + { + smem[tid] = product = product + smem[tid + 32]; + } + if (tid < 16) + { + smem[tid] = product = product + smem[tid + 16]; + smem[tid] = product = product + smem[tid + 8]; + smem[tid] = product = product + smem[tid + 4]; + smem[tid] = product = product + smem[tid + 2]; + } +#endif + + if (tid == 0){ + product = product + smem[tid + 1]; + labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } +} + +//--------------------------------------------------------------------- +// Linear SVM based classification +// 64x128 window, 9 bins and default parameters +// 256 threads, 252 of them are used +__kernel void classify_hists_252_kernel( + const int cdescr_width, const int cdescr_height, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + float product = 0.f; + if (tid < cdescr_width) + { + for (int i = 0; i < cdescr_height; i++) + product += coefs[i * cdescr_width + tid] * + hist[i * img_block_width * cblock_hist_size + tid]; } __local float products[NTHREADS]; @@ -282,67 +471,120 @@ __kernel void classify_hists_kernel(const int cblock_hist_size, const int cdescr if (tid < 64) products[tid] = product = product + products[tid + 64]; barrier(CLK_LOCAL_MEM_FENCE); - volatile __local float* smem = products; + volatile __local float* smem = products; +#ifdef CPU + if(tid<32) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else if (tid < 32) - { + { smem[tid] = product = product + smem[tid + 32]; -#if defined(WAVE_SIZE_16) || defined(WAVE_SIZE_1) - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 16) - { -#endif smem[tid] = product = product + smem[tid + 16]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 8) - { -#endif smem[tid] = product = product + smem[tid + 8]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 4) - { -#endif smem[tid] = product = product + smem[tid + 4]; -#ifdef WAVE_SIZE_1 - } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 2) - { -#endif smem[tid] = product = product + smem[tid + 2]; -#ifdef WAVE_SIZE_1 } - barrier(CLK_LOCAL_MEM_FENCE); - if (tid < 1) - { #endif - smem[tid] = product = product + smem[tid + 1]; + if (tid == 0){ + product = product + smem[tid + 1]; + labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } +} + +//--------------------------------------------------------------------- +// Linear SVM based classification +// 256 threads +__kernel void classify_hists_kernel( + const int cdescr_size, const int cdescr_width, const int cblock_hist_size, + const int img_win_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float * block_hists, __global const float* coefs, + float free_coef, float threshold, __global uchar* labels) +{ + const int tid = get_local_id(0); + const int gidX = get_group_id(0); + const int gidY = get_group_id(1); + + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + + float product = 0.f; + for (int i = tid; i < cdescr_size; i += NTHREADS) + { + int offset_y = i / cdescr_width; + int offset_x = i - offset_y * cdescr_width; + product += coefs[i] * + hist[offset_y * img_block_width * cblock_hist_size + offset_x]; } - if (tid == 0) + __local float products[NTHREADS]; + + products[tid] = product; + + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 128) products[tid] = product = product + products[tid + 128]; + barrier(CLK_LOCAL_MEM_FENCE); + + if (tid < 64) products[tid] = product = product + products[tid + 64]; + barrier(CLK_LOCAL_MEM_FENCE); + + volatile __local float* smem = products; +#ifdef CPU + if(tid<32) smem[tid] = product = product + smem[tid + 32]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<16) smem[tid] = product = product + smem[tid + 16]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<8) smem[tid] = product = product + smem[tid + 8]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<4) smem[tid] = product = product + smem[tid + 4]; + barrier(CLK_LOCAL_MEM_FENCE); + if(tid<2) smem[tid] = product = product + smem[tid + 2]; + barrier(CLK_LOCAL_MEM_FENCE); +#else + if (tid < 32) + { + smem[tid] = product = product + smem[tid + 32]; + smem[tid] = product = product + smem[tid + 16]; + smem[tid] = product = product + smem[tid + 8]; + smem[tid] = product = product + smem[tid + 4]; + smem[tid] = product = product + smem[tid + 2]; + } +#endif + if (tid == 0){ + smem[tid] = product = product + smem[tid + 1]; labels[gidY * img_win_width + gidX] = (product + free_coef >= threshold); + } } //---------------------------------------------------------------------------- // Extract descriptors -__kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, const int cdescr_width, - const int img_block_width, const int win_block_stride_x, const int win_block_stride_y, - __global const float* block_hists, __global float* descriptors) +__kernel void extract_descrs_by_rows_kernel( + const int cblock_hist_size, const int descriptors_quadstep, + const int cdescr_size, const int cdescr_width, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float* block_hists, __global float* descriptors) { int tid = get_local_id(0); int gidX = get_group_id(0); int gidY = get_group_id(1); // Get left top corner of the window in src - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; // Get left top corner of the window in dst - __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + __global float* descriptor = descriptors + + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; // Copy elements from src to dst for (int i = tid; i < cdescr_size; i += NTHREADS) @@ -353,19 +595,23 @@ __kernel void extract_descrs_by_rows_kernel(const int cblock_hist_size, const in } } -__kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, - const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, const int win_block_stride_x, - const int win_block_stride_y, __global const float* block_hists, __global float* descriptors) +__kernel void extract_descrs_by_cols_kernel( + const int cblock_hist_size, const int descriptors_quadstep, const int cdescr_size, + const int cnblocks_win_x, const int cnblocks_win_y, const int img_block_width, + const int win_block_stride_x, const int win_block_stride_y, + __global const float* block_hists, __global float* descriptors) { int tid = get_local_id(0); int gidX = get_group_id(0); int gidY = get_group_id(1); // Get left top corner of the window in src - __global const float* hist = block_hists + (gidY * win_block_stride_y * img_block_width + gidX * win_block_stride_x) * cblock_hist_size; + __global const float* hist = block_hists + (gidY * win_block_stride_y * + img_block_width + gidX * win_block_stride_x) * cblock_hist_size; // Get left top corner of the window in dst - __global float* descriptor = descriptors + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; + __global float* descriptor = descriptors + + (gidY * get_num_groups(0) + gidX) * descriptors_quadstep; // Copy elements from src to dst for (int i = tid; i < cdescr_size; i += NTHREADS) @@ -376,16 +622,19 @@ __kernel void extract_descrs_by_cols_kernel(const int cblock_hist_size, const in int y = block_idx / cnblocks_win_x; int x = block_idx - y * cnblocks_win_x; - descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; + descriptor[(x * cnblocks_win_y + y) * cblock_hist_size + idx_in_block] = + hist[(y * img_block_width + x) * cblock_hist_size + idx_in_block]; } } //---------------------------------------------------------------------------- // Gradients computation -__kernel void compute_gradients_8UC4_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, - const __global uchar4 * img, __global float * grad, __global uchar * qangle, - const float angle_scale, const char correct_gamma, const int cnbins) +__kernel void compute_gradients_8UC4_kernel( + const int height, const int width, + const int img_step, const int grad_quadstep, const int qangle_step, + const __global uchar4 * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) { const int x = get_global_id(0); const int tid = get_local_id(0); @@ -426,8 +675,10 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c barrier(CLK_LOCAL_MEM_FENCE); if (x < width) { - float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], sh_row[tid + 2 * (NTHREADS + 2)]); - float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], sh_row[tid + 2 + 2 * (NTHREADS + 2)]); + float3 a = (float3) (sh_row[tid], sh_row[tid + (NTHREADS + 2)], + sh_row[tid + 2 * (NTHREADS + 2)]); + float3 b = (float3) (sh_row[tid + 2], sh_row[tid + 2 + (NTHREADS + 2)], + sh_row[tid + 2 + 2 * (NTHREADS + 2)]); float3 dx; if (correct_gamma == 1) @@ -482,9 +733,11 @@ __kernel void compute_gradients_8UC4_kernel(const int height, const int width, c } } -__kernel void compute_gradients_8UC1_kernel(const int height, const int width, const int img_step, const int grad_quadstep, const int qangle_step, - __global const uchar * img, __global float * grad, __global uchar * qangle, - const float angle_scale, const char correct_gamma, const int cnbins) +__kernel void compute_gradients_8UC1_kernel( + const int height, const int width, + const int img_step, const int grad_quadstep, const int qangle_step, + __global const uchar * img, __global float * grad, __global uchar * qangle, + const float angle_scale, const char correct_gamma, const int cnbins) { const int x = get_global_id(0); const int tid = get_local_id(0); @@ -539,43 +792,4 @@ __kernel void compute_gradients_8UC1_kernel(const int height, const int width, c grad[ (gidY * grad_quadstep + x) << 1 ] = mag * (1.f - ang); grad[ ((gidY * grad_quadstep + x) << 1) + 1 ] = mag * ang; } -} - -//---------------------------------------------------------------------------- -// Resize - -__kernel void resize_8UC4_kernel(__global uchar4 * dst, __global const uchar4 * src, - int dst_offset, int src_offset, int dst_step, int src_step, - int src_cols, int src_rows, int dst_cols, int dst_rows, float ifx, float ify ) -{ - int dx = get_global_id(0); - int dy = get_global_id(1); - - int sx = (int)floor(dx*ifx+0.5f); - int sy = (int)floor(dy*ify+0.5f); - sx = min(sx, src_cols-1); - sy = min(sy, src_rows-1); - int dpos = (dst_offset>>2) + dy * (dst_step>>2) + dx; - int spos = (src_offset>>2) + sy * (src_step>>2) + sx; - - if(dx Date: Wed, 19 Jun 2013 11:31:42 +0800 Subject: [PATCH 102/121] Fix cmake path finding for amd libs. There is no WIN64 defined in the environment. --- cmake/OpenCVDetectOpenCL.cmake | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmake/OpenCVDetectOpenCL.cmake b/cmake/OpenCVDetectOpenCL.cmake index a1e8bbac7..2c96274a8 100644 --- a/cmake/OpenCVDetectOpenCL.cmake +++ b/cmake/OpenCVDetectOpenCL.cmake @@ -44,7 +44,7 @@ if(OPENCL_FOUND) set(OPENCL_INCLUDE_DIRS ${OPENCL_INCLUDE_DIR}) set(OPENCL_LIBRARIES ${OPENCL_LIBRARY}) - if(WIN64) + if(WIN32 AND X86_64) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib64/import) elseif(WIN32) set(CLAMD_POSSIBLE_LIB_SUFFIXES lib32/import) From 2c198f6cd6802ebfc8d7216f2b06b7c7fb42f6b9 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 19 Jun 2013 13:03:35 +0800 Subject: [PATCH 103/121] revise accuracy and perf tests --- modules/ocl/perf/main.cpp | 2 + .../perf_calib3d.cpp} | 85 ++++--- modules/ocl/perf/perf_filters.cpp | 16 +- modules/ocl/perf/perf_hog.cpp | 76 +----- modules/ocl/perf/perf_imgproc.cpp | 46 +++- .../{perf_columnsum.cpp => perf_moments.cpp} | 62 ++--- modules/ocl/perf/precomp.cpp | 14 -- modules/ocl/test/test_haar.cpp | 180 -------------- modules/ocl/test/test_imgproc.cpp | 46 +++- .../test/{test_hog.cpp => test_objdetect.cpp} | 231 ++++++++++-------- .../{test_pyrdown.cpp => test_pyramids.cpp} | 44 +++- modules/ocl/test/test_pyrup.cpp | 91 ------- modules/ocl/test/utility.cpp | 102 ++++---- modules/ocl/test/utility.hpp | 11 +- 14 files changed, 392 insertions(+), 614 deletions(-) rename modules/ocl/{test/test_columnsum.cpp => perf/perf_calib3d.cpp} (65%) rename modules/ocl/perf/{perf_columnsum.cpp => perf_moments.cpp} (68%) delete mode 100644 modules/ocl/test/test_haar.cpp rename modules/ocl/test/{test_hog.cpp => test_objdetect.cpp} (51%) rename modules/ocl/test/{test_pyrdown.cpp => test_pyramids.cpp} (75%) delete mode 100644 modules/ocl/test/test_pyrup.cpp diff --git a/modules/ocl/perf/main.cpp b/modules/ocl/perf/main.cpp index 2da17755e..dfcac20bc 100644 --- a/modules/ocl/perf/main.cpp +++ b/modules/ocl/perf/main.cpp @@ -52,6 +52,8 @@ int main(int argc, const char *argv[]) cerr << "no device found\n"; return -1; } + // set this to overwrite binary cache every time the test starts + ocl::setBinaryDiskCache(ocl::CACHE_UPDATE); int devidx = 0; diff --git a/modules/ocl/test/test_columnsum.cpp b/modules/ocl/perf/perf_calib3d.cpp similarity index 65% rename from modules/ocl/test/test_columnsum.cpp rename to modules/ocl/perf/perf_calib3d.cpp index 231f0657b..f998ddf0f 100644 --- a/modules/ocl/test/test_columnsum.cpp +++ b/modules/ocl/perf/perf_calib3d.cpp @@ -15,8 +15,8 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Chunpeng Zhang chunpeng@multicorewareinc.com -// +// Fangfang Bai, fangfang@multicorewareinc.com +// Jin Ma, jin@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -31,7 +31,7 @@ // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // -// This software is provided by the copyright holders and contributors "as is" and +// This software is provided by the copyright holders and contributors as is and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, @@ -45,50 +45,57 @@ //M*/ #include "precomp.hpp" -#include - -#ifdef HAVE_OPENCL - -PARAM_TEST_CASE(ColumnSum, cv::Size) +///////////// StereoMatchBM //////////////////////// +PERFTEST(StereoMatchBM) { - cv::Size size; - cv::Mat src; + Mat left_image = imread(abspath("aloeL.jpg"), cv::IMREAD_GRAYSCALE); + Mat right_image = imread(abspath("aloeR.jpg"), cv::IMREAD_GRAYSCALE); + Mat disp,dst; + ocl::oclMat d_left, d_right,d_disp; + int n_disp= 128; + int winSize =19; - virtual void SetUp() - { - size = GET_PARAM(0); - } -}; + SUBTEST << left_image.cols << 'x' << left_image.rows << "; aloeL.jpg ;"<< right_image.cols << 'x' << right_image.rows << "; aloeR.jpg "; -TEST_P(ColumnSum, Accuracy) -{ - cv::Mat src = randomMat(size, CV_32FC1); - cv::ocl::oclMat d_dst; - cv::ocl::oclMat d_src(src); + StereoBM bm(0, n_disp, winSize); + bm(left_image, right_image, dst); - cv::ocl::columnSum(d_src, d_dst); + CPU_ON; + bm(left_image, right_image, dst); + CPU_OFF; - cv::Mat dst(d_dst); + d_left.upload(left_image); + d_right.upload(right_image); - for (int j = 0; j < src.cols; ++j) - { - float gold = src.at(0, j); - float res = dst.at(0, j); - ASSERT_NEAR(res, gold, 1e-5); - } + ocl::StereoBM_OCL d_bm(0, n_disp, winSize); - for (int i = 1; i < src.rows; ++i) - { - for (int j = 0; j < src.cols; ++j) - { - float gold = src.at(i, j) += src.at(i - 1, j); - float res = dst.at(i, j); - ASSERT_NEAR(res, gold, 1e-5); - } - } + WARMUP_ON; + d_bm(d_left, d_right, d_disp); + WARMUP_OFF; + + cv::Mat ocl_mat; + d_disp.download(ocl_mat); + ocl_mat.convertTo(ocl_mat, dst.type()); + + GPU_ON; + d_bm(d_left, d_right, d_disp); + GPU_OFF; + + GPU_FULL_ON; + d_left.upload(left_image); + d_right.upload(right_image); + d_bm(d_left, d_right, d_disp); + d_disp.download(disp); + GPU_FULL_OFF; + + TestSystem::instance().setAccurate(-1, 0.); } -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES); -#endif + + + + + + \ No newline at end of file diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index a05301b34..e988ce09d 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -284,6 +284,7 @@ PERFTEST(GaussianBlur) Mat src, dst, ocl_dst; int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4}; std::string type_name[] = {"CV_8UC1", "CV_8UC4", "CV_32FC1", "CV_32FC4"}; + const int ksize = 7; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { @@ -291,29 +292,28 @@ PERFTEST(GaussianBlur) { SUBTEST << size << 'x' << size << "; " << type_name[j] ; - gen(src, size, size, all_type[j], 5, 16); + gen(src, size, size, all_type[j], 0, 256); - GaussianBlur(src, dst, Size(9, 9), 0); + GaussianBlur(src, dst, Size(ksize, ksize), 0); CPU_ON; - GaussianBlur(src, dst, Size(9, 9), 0); + GaussianBlur(src, dst, Size(ksize, ksize), 0); CPU_OFF; ocl::oclMat d_src(src); - ocl::oclMat d_dst(src.size(), src.type()); - ocl::oclMat d_buf; + ocl::oclMat d_dst; WARMUP_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); WARMUP_OFF; GPU_ON; - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); GPU_OFF; GPU_FULL_ON; d_src.upload(src); - ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0); + ocl::GaussianBlur(d_src, d_dst, Size(ksize, ksize), 0); d_dst.download(ocl_dst); GPU_FULL_OFF; diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index 05093811f..7daa61396 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -46,11 +46,6 @@ #include "precomp.hpp" ///////////// HOG//////////////////////// -bool match_rect(cv::Rect r1, cv::Rect r2, int threshold) -{ - return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) && - (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); -} PERFTEST(HOG) { @@ -61,13 +56,12 @@ PERFTEST(HOG) throw runtime_error("can't open road.png"); } - cv::HOGDescriptor hog; hog.setSVMDetector(hog.getDefaultPeopleDetector()); std::vector found_locations; std::vector d_found_locations; - SUBTEST << 768 << 'x' << 576 << "; road.png"; + SUBTEST << src.cols << 'x' << src.rows << "; road.png"; hog.detectMultiScale(src, found_locations); @@ -84,70 +78,10 @@ PERFTEST(HOG) ocl_hog.detectMultiScale(d_src, d_found_locations); WARMUP_OFF; - // Ground-truth rectangular people window - cv::Rect win1_64x128(231, 190, 72, 144); - cv::Rect win2_64x128(621, 156, 97, 194); - cv::Rect win1_48x96(238, 198, 63, 126); - cv::Rect win2_48x96(619, 161, 92, 185); - cv::Rect win3_48x96(488, 136, 56, 112); - - // Compare whether ground-truth windows are detected and compare the number of windows detected. - std::vector d_comp(4); - std::vector comp(4); - for(int i = 0; i < (int)d_comp.size(); i++) - { - d_comp[i] = 0; - comp[i] = 0; - } - - int threshold = 10; - int val = 32; - d_comp[0] = (int)d_found_locations.size(); - comp[0] = (int)found_locations.size(); - - cv::Size winSize = hog.winSize; - - if (winSize == cv::Size(48, 96)) - { - for(int i = 0; i < (int)d_found_locations.size(); i++) - { - if (match_rect(d_found_locations[i], win1_48x96, threshold)) - d_comp[1] = val; - if (match_rect(d_found_locations[i], win2_48x96, threshold)) - d_comp[2] = val; - if (match_rect(d_found_locations[i], win3_48x96, threshold)) - d_comp[3] = val; - } - for(int i = 0; i < (int)found_locations.size(); i++) - { - if (match_rect(found_locations[i], win1_48x96, threshold)) - comp[1] = val; - if (match_rect(found_locations[i], win2_48x96, threshold)) - comp[2] = val; - if (match_rect(found_locations[i], win3_48x96, threshold)) - comp[3] = val; - } - } - else if (winSize == cv::Size(64, 128)) - { - for(int i = 0; i < (int)d_found_locations.size(); i++) - { - if (match_rect(d_found_locations[i], win1_64x128, threshold)) - d_comp[1] = val; - if (match_rect(d_found_locations[i], win2_64x128, threshold)) - d_comp[2] = val; - } - for(int i = 0; i < (int)found_locations.size(); i++) - { - if (match_rect(found_locations[i], win1_64x128, threshold)) - comp[1] = val; - if (match_rect(found_locations[i], win2_64x128, threshold)) - comp[2] = val; - } - } - - cv::Mat gpu_rst(d_comp), cpu_rst(comp); - TestSystem::instance().ExpectedMatNear(gpu_rst, cpu_rst, 3); + if(d_found_locations.size() == found_locations.size()) + TestSystem::instance().setAccurate(1, 0); + else + TestSystem::instance().setAccurate(0, abs((int)found_locations.size() - (int)d_found_locations.size())); GPU_ON; ocl_hog.detectMultiScale(d_src, found_locations); diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index e87e8213d..b330c5ffa 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -743,12 +743,12 @@ PERFTEST(meanShiftFiltering) WARMUP_OFF; GPU_ON; - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); GPU_OFF; GPU_FULL_ON; d_src.upload(src); - ocl::meanShiftFiltering(d_src, d_dst, sp, sr); + ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit); d_dst.download(ocl_dst); GPU_FULL_OFF; @@ -969,3 +969,45 @@ PERFTEST(CLAHE) } } } + +///////////// columnSum//////////////////////// +PERFTEST(columnSum) +{ + Mat src, dst, ocl_dst; + ocl::oclMat d_src, d_dst; + + for (int size = Min_Size; size <= Max_Size; size *= Multiple) + { + SUBTEST << size << 'x' << size << "; CV_32FC1"; + + gen(src, size, size, CV_32FC1, 0, 256); + + CPU_ON; + dst.create(src.size(), src.type()); + for (int j = 0; j < src.cols; j++) + dst.at(0, j) = src.at(0, j); + + for (int i = 1; i < src.rows; ++i) + for (int j = 0; j < src.cols; ++j) + dst.at(i, j) = dst.at(i - 1 , j) + src.at(i , j); + CPU_OFF; + + d_src.upload(src); + + WARMUP_ON; + ocl::columnSum(d_src, d_dst); + WARMUP_OFF; + + GPU_ON; + ocl::columnSum(d_src, d_dst); + GPU_OFF; + + GPU_FULL_ON; + d_src.upload(src); + ocl::columnSum(d_src, d_dst); + d_dst.download(ocl_dst); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1); + } +} diff --git a/modules/ocl/perf/perf_columnsum.cpp b/modules/ocl/perf/perf_moments.cpp similarity index 68% rename from modules/ocl/perf/perf_columnsum.cpp rename to modules/ocl/perf/perf_moments.cpp index ff7ebcd1d..7fa3948de 100644 --- a/modules/ocl/perf/perf_columnsum.cpp +++ b/modules/ocl/perf/perf_moments.cpp @@ -44,45 +44,49 @@ // //M*/ #include "precomp.hpp" - -///////////// columnSum//////////////////////// -PERFTEST(columnSum) +///////////// Moments //////////////////////// +PERFTEST(Moments) { - Mat src, dst, ocl_dst; - ocl::oclMat d_src, d_dst; + Mat src; + bool binaryImage = 0; + + int all_type[] = {CV_8UC1, CV_16SC1, CV_32FC1, CV_64FC1}; + std::string type_name[] = {"CV_8UC1", "CV_16SC1", "CV_32FC1", "CV_64FC1"}; for (int size = Min_Size; size <= Max_Size; size *= Multiple) { - SUBTEST << size << 'x' << size << "; CV_32FC1"; + for (size_t j = 0; j < sizeof(all_type) / sizeof(int); j++) + { + SUBTEST << size << 'x' << size << "; " << type_name[j]; - gen(src, size, size, CV_32FC1, 0, 256); + gen(src, size, size, all_type[j], 0, 256); - CPU_ON; - dst.create(src.size(), src.type()); - for (int j = 0; j < src.cols; j++) - dst.at(0, j) = src.at(0, j); + cv::Moments CvMom = moments(src, binaryImage); - for (int i = 1; i < src.rows; ++i) - for (int j = 0; j < src.cols; ++j) - dst.at(i, j) = dst.at(i - 1 , j) + src.at(i , j); - CPU_OFF; + CPU_ON; + moments(src, binaryImage); + CPU_OFF; - d_src.upload(src); + cv::Moments oclMom; + WARMUP_ON; + oclMom = ocl::ocl_moments(src, binaryImage); + WARMUP_OFF; - WARMUP_ON; - ocl::columnSum(d_src, d_dst); - WARMUP_OFF; + Mat gpu_dst, cpu_dst; + HuMoments(CvMom, cpu_dst); + HuMoments(oclMom, gpu_dst); - GPU_ON; - ocl::columnSum(d_src, d_dst); - GPU_OFF; + GPU_ON; + ocl::ocl_moments(src, binaryImage); + GPU_OFF; - GPU_FULL_ON; - d_src.upload(src); - ocl::columnSum(d_src, d_dst); - d_dst.download(ocl_dst); - GPU_FULL_OFF; + GPU_FULL_ON; + ocl::ocl_moments(src, binaryImage); + GPU_FULL_OFF; + + TestSystem::instance().ExpectedMatNear(gpu_dst, cpu_dst, .5); + + } - TestSystem::instance().ExpectedMatNear(dst, ocl_dst, 5e-1); } -} \ No newline at end of file +} diff --git a/modules/ocl/perf/precomp.cpp b/modules/ocl/perf/precomp.cpp index 71a13a1ee..9fc634290 100644 --- a/modules/ocl/perf/precomp.cpp +++ b/modules/ocl/perf/precomp.cpp @@ -331,20 +331,6 @@ void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time, cout << setiosflags(ios_base::left); stringstream stream; -#if 0 - if(is_accurate == 1) - stream << "Pass"; - else if(is_accurate_ == 0) - stream << "Fail"; - else if(is_accurate == -1) - stream << " "; - else - { - std::cout<<"is_accurate errer: "< faces, oclfaces; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - MemStorage storage(cvCreateMemStorage(0)); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - cv::ocl::oclMat image; - CvSeq *_objects; - image.upload(smallImg); - _objects = cascade.oclHaarDetectObjects( image, storage, 1.1, - 3, flags, Size(30, 30), Size(0, 0) ); - vector vecAvgComp; - Seq(_objects).copyTo(vecAvgComp); - oclfaces.resize(vecAvgComp.size()); - std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); - - cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - EXPECT_EQ(faces.size(), oclfaces.size()); -} - -TEST_P(Haar, FaceDetectUseBuf) -{ - string imgName = workdir + "lena.jpg"; - Mat img = imread( imgName, 1 ); - - if(img.empty()) - { - std::cout << "Couldn't read " << imgName << std::endl; - return ; - } - - vector faces, oclfaces; - - Mat gray, smallImg(cvRound (img.rows / scale), cvRound(img.cols / scale), CV_8UC1 ); - cvtColor( img, gray, CV_BGR2GRAY ); - resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - equalizeHist( smallImg, smallImg ); - - cv::ocl::oclMat image; - image.upload(smallImg); - - cv::ocl::OclCascadeClassifierBuf cascadebuf; - if( !cascadebuf.load( cascadeName ) ) - { - cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << endl; - return; - } - cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - - cpucascade.detectMultiScale( smallImg, faces, 1.1, 3, - flags, - Size(30, 30), Size(0, 0) ); - EXPECT_EQ(faces.size(), oclfaces.size()); - - // intentionally run ocl facedetect again and check if it still works after the first run - cascadebuf.detectMultiScale( image, oclfaces, 1.1, 3, - flags, - Size(30, 30)); - cascadebuf.release(); - EXPECT_EQ(faces.size(), oclfaces.size()); -} - -INSTANTIATE_TEST_CASE_P(FaceDetect, Haar, - Combine(Values(1.0), - Values(CV_HAAR_SCALE_IMAGE, 0), Values(cascade_frontalface_alt, cascade_frontalface_alt2))); - -#endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_imgproc.cpp b/modules/ocl/test/test_imgproc.cpp index b9f4740b1..3a98671d5 100644 --- a/modules/ocl/test/test_imgproc.cpp +++ b/modules/ocl/test/test_imgproc.cpp @@ -1573,6 +1573,47 @@ TEST_P(Convolve, Mat) } } +//////////////////////////////// ColumnSum ////////////////////////////////////// +PARAM_TEST_CASE(ColumnSum, cv::Size) +{ + cv::Size size; + cv::Mat src; + + virtual void SetUp() + { + size = GET_PARAM(0); + } +}; + +TEST_P(ColumnSum, Accuracy) +{ + cv::Mat src = randomMat(size, CV_32FC1); + cv::ocl::oclMat d_dst; + cv::ocl::oclMat d_src(src); + + cv::ocl::columnSum(d_src, d_dst); + + cv::Mat dst(d_dst); + + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(0, j); + float res = dst.at(0, j); + ASSERT_NEAR(res, gold, 1e-5); + } + + for (int i = 1; i < src.rows; ++i) + { + for (int j = 0; j < src.cols; ++j) + { + float gold = src.at(i, j) += src.at(i - 1, j); + float res = dst.at(i, j); + ASSERT_NEAR(res, gold, 1e-5); + } + } +} +///////////////////////////////////////////////////////////////////////////////////// + INSTANTIATE_TEST_CASE_P(ImgprocTestBase, equalizeHist, Combine( ONE_TYPE(CV_8UC1), NULL_TYPE, @@ -1688,7 +1729,6 @@ INSTANTIATE_TEST_CASE_P(ImgProc, CLAHE, Combine( Values(cv::Size(128, 128), cv::Size(113, 113), cv::Size(1300, 1300)), Values(0.0, 40.0))); -//INSTANTIATE_TEST_CASE_P(ConvolveTestBase, Convolve, Combine( -// Values(CV_32FC1, CV_32FC1), -// Values(false))); // Values(false) is the reserved parameter +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, ColumnSum, DIFFERENT_SIZES); + #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_hog.cpp b/modules/ocl/test/test_objdetect.cpp similarity index 51% rename from modules/ocl/test/test_hog.cpp rename to modules/ocl/test/test_objdetect.cpp index cfc4e3963..86590f798 100644 --- a/modules/ocl/test/test_hog.cpp +++ b/modules/ocl/test/test_objdetect.cpp @@ -15,7 +15,7 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Wenju He, wenju@multicorewareinc.com +// Yao Wang, bitwangyaoyao@gmail.com // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: @@ -45,51 +45,58 @@ #include "precomp.hpp" #include "opencv2/core/core.hpp" -using namespace std; +#include "opencv2/objdetect/objdetect.hpp" + +using namespace cv; +using namespace testing; #ifdef HAVE_OPENCL extern string workdir; -PARAM_TEST_CASE(HOG, cv::Size, int) + +///////////////////// HOG ///////////////////////////// +PARAM_TEST_CASE(HOG, Size, int) { - cv::Size winSize; + Size winSize; int type; + Mat img_rgb; virtual void SetUp() { winSize = GET_PARAM(0); type = GET_PARAM(1); + img_rgb = readImage(workdir + "../gpu/road.png"); + if(img_rgb.empty()) + { + std::cout << "Couldn't read road.png" << std::endl; + } } }; TEST_P(HOG, GetDescriptors) { - // Load image - cv::Mat img_rgb = readImage(workdir + "lena.jpg"); - ASSERT_FALSE(img_rgb.empty()); - // Convert image - cv::Mat img; + Mat img; switch (type) { case CV_8UC1: - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + cvtColor(img_rgb, img, CV_BGR2GRAY); break; case CV_8UC4: default: - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + cvtColor(img_rgb, img, CV_BGR2BGRA); break; } - cv::ocl::oclMat d_img(img); + ocl::oclMat d_img(img); // HOGs - cv::ocl::HOGDescriptor ocl_hog; + ocl::HOGDescriptor ocl_hog; ocl_hog.gamma_correction = true; - cv::HOGDescriptor hog; + HOGDescriptor hog; hog.gammaCorrection = true; // Compute descriptor - cv::ocl::oclMat d_descriptors; + ocl::oclMat d_descriptors; ocl_hog.getDescriptors(d_img, ocl_hog.win_size, d_descriptors, ocl_hog.DESCR_FORMAT_COL_BY_COL); - cv::Mat down_descriptors; + Mat down_descriptors; d_descriptors.download(down_descriptors); down_descriptors = down_descriptors.reshape(0, down_descriptors.cols * down_descriptors.rows); @@ -105,45 +112,34 @@ TEST_P(HOG, GetDescriptors) hog.compute(img_rgb, descriptors, ocl_hog.win_size); break; } - cv::Mat cpu_descriptors(descriptors); + Mat cpu_descriptors(descriptors); EXPECT_MAT_SIMILAR(down_descriptors, cpu_descriptors, 1e-2); } - -bool match_rect(cv::Rect r1, cv::Rect r2, int threshold) -{ - return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) && - (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold)); -} - TEST_P(HOG, Detect) { - // Load image - cv::Mat img_rgb = readImage(workdir + "lena.jpg"); - ASSERT_FALSE(img_rgb.empty()); - // Convert image - cv::Mat img; + Mat img; switch (type) { case CV_8UC1: - cv::cvtColor(img_rgb, img, CV_BGR2GRAY); + cvtColor(img_rgb, img, CV_BGR2GRAY); break; case CV_8UC4: default: - cv::cvtColor(img_rgb, img, CV_BGR2BGRA); + cvtColor(img_rgb, img, CV_BGR2BGRA); break; } - cv::ocl::oclMat d_img(img); + ocl::oclMat d_img(img); // HOGs - if ((winSize != cv::Size(48, 96)) && (winSize != cv::Size(64, 128))) - winSize = cv::Size(64, 128); - cv::ocl::HOGDescriptor ocl_hog(winSize); + if ((winSize != Size(48, 96)) && (winSize != Size(64, 128))) + winSize = Size(64, 128); + ocl::HOGDescriptor ocl_hog(winSize); ocl_hog.gamma_correction = true; - cv::HOGDescriptor hog; + HOGDescriptor hog; hog.winSize = winSize; hog.gammaCorrection = true; @@ -165,88 +161,117 @@ TEST_P(HOG, Detect) } // OpenCL detection - std::vector d_found; - ocl_hog.detectMultiScale(d_img, d_found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + std::vector d_found; + ocl_hog.detectMultiScale(d_img, d_found, 0, Size(8, 8), Size(0, 0), 1.05, 6); // CPU detection - std::vector found; + std::vector found; switch (type) { case CV_8UC1: - hog.detectMultiScale(img, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + hog.detectMultiScale(img, found, 0, Size(8, 8), Size(0, 0), 1.05, 6); break; case CV_8UC4: default: - hog.detectMultiScale(img_rgb, found, 0, cv::Size(8, 8), cv::Size(0, 0), 1.05, 2); + hog.detectMultiScale(img_rgb, found, 0, Size(8, 8), Size(0, 0), 1.05, 6); break; } - // Ground-truth rectangular people window - cv::Rect win1_64x128(231, 190, 72, 144); - cv::Rect win2_64x128(621, 156, 97, 194); - cv::Rect win1_48x96(238, 198, 63, 126); - cv::Rect win2_48x96(619, 161, 92, 185); - cv::Rect win3_48x96(488, 136, 56, 112); - - // Compare whether ground-truth windows are detected and compare the number of windows detected. - std::vector d_comp(4); - std::vector comp(4); - for(int i = 0; i < (int)d_comp.size(); i++) - { - d_comp[i] = 0; - comp[i] = 0; - } - - int threshold = 10; - int val = 32; - d_comp[0] = (int)d_found.size(); - comp[0] = (int)found.size(); - if (winSize == cv::Size(48, 96)) - { - for(int i = 0; i < (int)d_found.size(); i++) - { - if (match_rect(d_found[i], win1_48x96, threshold)) - d_comp[1] = val; - if (match_rect(d_found[i], win2_48x96, threshold)) - d_comp[2] = val; - if (match_rect(d_found[i], win3_48x96, threshold)) - d_comp[3] = val; - } - for(int i = 0; i < (int)found.size(); i++) - { - if (match_rect(found[i], win1_48x96, threshold)) - comp[1] = val; - if (match_rect(found[i], win2_48x96, threshold)) - comp[2] = val; - if (match_rect(found[i], win3_48x96, threshold)) - comp[3] = val; - } - } - else if (winSize == cv::Size(64, 128)) - { - for(int i = 0; i < (int)d_found.size(); i++) - { - if (match_rect(d_found[i], win1_64x128, threshold)) - d_comp[1] = val; - if (match_rect(d_found[i], win2_64x128, threshold)) - d_comp[2] = val; - } - for(int i = 0; i < (int)found.size(); i++) - { - if (match_rect(found[i], win1_64x128, threshold)) - comp[1] = val; - if (match_rect(found[i], win2_64x128, threshold)) - comp[2] = val; - } - } - - EXPECT_MAT_NEAR(cv::Mat(d_comp), cv::Mat(comp), 3); + EXPECT_LT(checkRectSimilarity(img.size(), found, d_found), 1.0); } INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, HOG, testing::Combine( - testing::Values(cv::Size(64, 128), cv::Size(48, 96)), + testing::Values(Size(64, 128), Size(48, 96)), testing::Values(MatType(CV_8UC1), MatType(CV_8UC4)))); +///////////////////////////// Haar ////////////////////////////// +IMPLEMENT_PARAM_CLASS(CascadeName, std::string); +CascadeName cascade_frontalface_alt(std::string("haarcascade_frontalface_alt.xml")); +CascadeName cascade_frontalface_alt2(std::string("haarcascade_frontalface_alt2.xml")); +struct getRect +{ + Rect operator ()(const CvAvgComp &e) const + { + return e.rect; + } +}; -#endif //HAVE_OPENCL +PARAM_TEST_CASE(Haar, int, CascadeName) +{ + ocl::OclCascadeClassifier cascade, nestedCascade; + CascadeClassifier cpucascade, cpunestedCascade; + + int flags; + std::string cascadeName; + vector faces, oclfaces; + Mat img; + ocl::oclMat d_img; + + virtual void SetUp() + { + flags = GET_PARAM(0); + cascadeName = (workdir + "../../data/haarcascades/").append(GET_PARAM(1)); + if( (!cascade.load( cascadeName )) || (!cpucascade.load(cascadeName)) ) + { + std::cout << "ERROR: Could not load classifier cascade" << std::endl; + return; + } + img = readImage(workdir + "lena.jpg", IMREAD_GRAYSCALE); + if(img.empty()) + { + std::cout << "Couldn't read lena.jpg" << std::endl; + return ; + } + equalizeHist(img, img); + d_img.upload(img); + } +}; + +TEST_P(Haar, FaceDetect) +{ + MemStorage storage(cvCreateMemStorage(0)); + CvSeq *_objects; + _objects = cascade.oclHaarDetectObjects(d_img, storage, 1.1, 3, + flags, Size(30, 30), Size(0, 0)); + vector vecAvgComp; + Seq(_objects).copyTo(vecAvgComp); + oclfaces.resize(vecAvgComp.size()); + std::transform(vecAvgComp.begin(), vecAvgComp.end(), oclfaces.begin(), getRect()); + + cpucascade.detectMultiScale(img, faces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + + EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0); +} + +TEST_P(Haar, FaceDetectUseBuf) +{ + ocl::OclCascadeClassifierBuf cascadebuf; + if(!cascadebuf.load(cascadeName)) + { + std::cout << "ERROR: Could not load classifier cascade for FaceDetectUseBuf!" << std::endl; + return; + } + cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + cpucascade.detectMultiScale(img, faces, 1.1, 3, + flags, + Size(30, 30), Size(0, 0)); + + // intentionally run ocl facedetect again and check if it still works after the first run + cascadebuf.detectMultiScale(d_img, oclfaces, 1.1, 3, + flags, + Size(30, 30)); + cascadebuf.release(); + + EXPECT_LT(checkRectSimilarity(img.size(), faces, oclfaces), 1.0); +} + +INSTANTIATE_TEST_CASE_P(OCL_ObjDetect, Haar, + Combine(Values(CV_HAAR_SCALE_IMAGE, 0), + Values(cascade_frontalface_alt/*, cascade_frontalface_alt2*/))); + +#endif //HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/test/test_pyrdown.cpp b/modules/ocl/test/test_pyramids.cpp similarity index 75% rename from modules/ocl/test/test_pyrdown.cpp rename to modules/ocl/test/test_pyramids.cpp index 6d00fb5e4..1bd188dea 100644 --- a/modules/ocl/test/test_pyrdown.cpp +++ b/modules/ocl/test/test_pyramids.cpp @@ -15,7 +15,6 @@ // Third party copyrights are property of their respective owners. // // @Authors -// Dachuan Zhao, dachuan@multicorewareinc.com // Yao Wang yao@multicorewareinc.com // // Redistribution and use in source and binary forms, with or without modification, @@ -56,11 +55,12 @@ using namespace cvtest; using namespace testing; using namespace std; -PARAM_TEST_CASE(PyrDown, MatType, int) +PARAM_TEST_CASE(PyrBase, MatType, int) { int type; int channels; - + Mat dst_cpu; + oclMat gdst; virtual void SetUp() { type = GET_PARAM(0); @@ -69,19 +69,19 @@ PARAM_TEST_CASE(PyrDown, MatType, int) }; +/////////////////////// PyrDown ////////////////////////// +struct PyrDown : PyrBase {}; TEST_P(PyrDown, Mat) { for(int j = 0; j < LOOP_TIMES; j++) { - cv::Size size(MWIDTH, MHEIGHT); - cv::RNG &rng = TS::ptr()->get_rng(); - cv::Mat src = randomMat(rng, size, CV_MAKETYPE(type, channels), 0, 100, false); - - cv::ocl::oclMat gsrc(src), gdst; - cv::Mat dst_cpu; - cv::pyrDown(src, dst_cpu); - cv::ocl::pyrDown(gsrc, gdst); + Size size(MWIDTH, MHEIGHT); + Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + oclMat gsrc(src); + + pyrDown(src, dst_cpu); + pyrDown(gsrc, gdst); EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), type == CV_32F ? 1e-4f : 1.0f); } @@ -90,5 +90,27 @@ TEST_P(PyrDown, Mat) INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrDown, Combine( Values(CV_8U, CV_32F), Values(1, 3, 4))); +/////////////////////// PyrUp ////////////////////////// +struct PyrUp : PyrBase {}; + +TEST_P(PyrUp, Accuracy) +{ + for(int j = 0; j < LOOP_TIMES; j++) + { + Size size(MWIDTH, MHEIGHT); + Mat src = randomMat(size, CV_MAKETYPE(type, channels)); + oclMat gsrc(src); + + pyrUp(src, dst_cpu); + pyrUp(gsrc, gdst); + + EXPECT_MAT_NEAR(dst_cpu, Mat(gdst), (type == CV_32F ? 1e-4f : 1.0)); + } + +} + + +INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine( + Values(CV_8U, CV_32F), Values(1, 3, 4))); #endif // HAVE_OPENCL diff --git a/modules/ocl/test/test_pyrup.cpp b/modules/ocl/test/test_pyrup.cpp deleted file mode 100644 index afd3e8b1b..000000000 --- a/modules/ocl/test/test_pyrup.cpp +++ /dev/null @@ -1,91 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Zhang Chunpeng chunpeng@multicorewareinc.com -// Yao Wang yao@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" -#include "opencv2/core/core.hpp" - -#ifdef HAVE_OPENCL - -using namespace cv; -using namespace cvtest; -using namespace testing; -using namespace std; - -PARAM_TEST_CASE(PyrUp, MatType, int) -{ - int type; - int channels; - - virtual void SetUp() - { - type = GET_PARAM(0); - channels = GET_PARAM(1); - } -}; - -TEST_P(PyrUp, Accuracy) -{ - for(int j = 0; j < LOOP_TIMES; j++) - { - Size size(MWIDTH, MHEIGHT); - Mat src = randomMat(size, CV_MAKETYPE(type, channels)); - Mat dst_gold; - pyrUp(src, dst_gold); - ocl::oclMat dst; - ocl::oclMat srcMat(src); - ocl::pyrUp(srcMat, dst); - - EXPECT_MAT_NEAR(dst_gold, Mat(dst), (type == CV_32F ? 1e-4f : 1.0)); - } - -} - - -INSTANTIATE_TEST_CASE_P(OCL_ImgProc, PyrUp, testing::Combine( - Values(CV_8U, CV_32F), Values(1, 3, 4))); - - -#endif // HAVE_OPENCL \ No newline at end of file diff --git a/modules/ocl/test/utility.cpp b/modules/ocl/test/utility.cpp index 4b21081a8..27f9cec07 100644 --- a/modules/ocl/test/utility.cpp +++ b/modules/ocl/test/utility.cpp @@ -100,12 +100,6 @@ Mat randomMat(Size size, int type, double minVal, double maxVal) return randomMat(TS::ptr()->get_rng(), size, type, minVal, maxVal, false); } - - - - - - /* void showDiff(InputArray gold_, InputArray actual_, double eps) { @@ -137,58 +131,7 @@ void showDiff(InputArray gold_, InputArray actual_, double eps) } */ -/* -bool supportFeature(const DeviceInfo& info, FeatureSet feature) -{ - return TargetArchs::builtWith(feature) && info.supports(feature); -} -const vector& devices() -{ - static vector devs; - static bool first = true; - - if (first) - { - int deviceCount = getCudaEnabledDeviceCount(); - - devs.reserve(deviceCount); - - for (int i = 0; i < deviceCount; ++i) - { - DeviceInfo info(i); - if (info.isCompatible()) - devs.push_back(info); - } - - first = false; - } - - return devs; -} - -vector devices(FeatureSet feature) -{ - const vector& d = devices(); - - vector devs_filtered; - - if (TargetArchs::builtWith(feature)) - { - devs_filtered.reserve(d.size()); - - for (size_t i = 0, size = d.size(); i < size; ++i) - { - const DeviceInfo& info = d[i]; - - if (info.supports(feature)) - devs_filtered.push_back(info); - } - } - - return devs_filtered; -} -*/ vector types(int depth_start, int depth_end, int cn_start, int cn_end) { @@ -264,3 +207,48 @@ void PrintTo(const Inverse &inverse, std::ostream *os) (*os) << "direct"; } +double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& ob2) +{ + double final_test_result = 0.0; + size_t sz1 = ob1.size(); + size_t sz2 = ob2.size(); + + if(sz1 != sz2) + { + return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } + else + { + if(sz1==0 && sz2==0) + return 0; + cv::Mat cpu_result(sz, CV_8UC1); + cpu_result.setTo(0); + + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) + { + cv::Mat cpu_result_roi(cpu_result, *r); + cpu_result_roi.setTo(1); + cpu_result.copyTo(cpu_result); + } + int cpu_area = cv::countNonZero(cpu_result > 0); + + cv::Mat gpu_result(sz, CV_8UC1); + gpu_result.setTo(0); + for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) + { + cv::Mat gpu_result_roi(gpu_result, *r2); + gpu_result_roi.setTo(1); + gpu_result.copyTo(gpu_result); + } + + cv::Mat result_; + multiply(cpu_result, gpu_result, result_); + int result = cv::countNonZero(result_ > 0); + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; + } + return final_test_result; +} + diff --git a/modules/ocl/test/utility.hpp b/modules/ocl/test/utility.hpp index 42fa69384..0b101ec50 100644 --- a/modules/ocl/test/utility.hpp +++ b/modules/ocl/test/utility.hpp @@ -55,13 +55,12 @@ cv::Mat randomMat(cv::Size size, int type, double minVal = 0.0, double maxVal = void showDiff(cv::InputArray gold, cv::InputArray actual, double eps); -//! return true if device supports specified feature and gpu module was built with support the feature. -//bool supportFeature(const cv::gpu::DeviceInfo& info, cv::gpu::FeatureSet feature); +// This function test if gpu_rst matches cpu_rst. +// If the two vectors are not equal, it will return the difference in vector size +// Else it will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) +// The smaller, the better matched +double checkRectSimilarity(cv::Size sz, std::vector& ob1, std::vector& ob2); -//! return all devices compatible with current gpu module build. -//const std::vector& devices(); -//! return all devices compatible with current gpu module build which support specified feature. -//std::vector devices(cv::gpu::FeatureSet feature); //! read image from testdata folder. cv::Mat readImage(const std::string &fileName, int flags = cv::IMREAD_COLOR); From 4b983679a56212aa20b886fed8c73de701904c6e Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 19 Jun 2013 10:09:19 +0400 Subject: [PATCH 104/121] fix gpuarithm module compilation (Bug #3103) --- modules/gpuarithm/src/arithm.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/gpuarithm/src/arithm.cpp b/modules/gpuarithm/src/arithm.cpp index 6045cf5ba..eb7d710e6 100644 --- a/modules/gpuarithm/src/arithm.cpp +++ b/modules/gpuarithm/src/arithm.cpp @@ -173,7 +173,7 @@ void cv::gpu::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray (void) _dst; (void) flags; (void) stream; - CV_Error(:Error::StsNotImplemented, "The library was build without CUBLAS"); + CV_Error(Error::StsNotImplemented, "The library was build without CUBLAS"); #else // CUBLAS works with column-major matrices @@ -624,10 +624,10 @@ namespace Ptr cv::gpu::createConvolution(Size user_block_size) { -#ifndef HAVE_CUBLAS +#ifndef HAVE_CUFFT (void) user_block_size; - CV_Error(cv::Error::StsNotImplemented, "The library was build without CUFFT"); - return Ptr(); + CV_Error(Error::StsNotImplemented, "The library was build without CUFFT"); + return Ptr(); #else return new ConvolutionImpl(user_block_size); #endif From f1c549fabf2d916df306a889137de49f3ef338d5 Mon Sep 17 00:00:00 2001 From: yao Date: Wed, 19 Jun 2013 16:36:23 +0800 Subject: [PATCH 105/121] revise ocl samples, add tvl1 sample --- samples/ocl/facedetect.cpp | 159 ++++++++------ samples/ocl/hog.cpp | 335 +++++++++++------------------ samples/ocl/pyrlk_optical_flow.cpp | 59 +++-- samples/ocl/squares.cpp | 240 +++++++++++++++++---- samples/ocl/stereo_match.cpp | 306 ++++++++++++-------------- samples/ocl/surf_matcher.cpp | 205 +++++++----------- samples/ocl/tvl1_optical_flow.cpp | 265 +++++++++++++++++++++++ 7 files changed, 924 insertions(+), 645 deletions(-) create mode 100644 samples/ocl/tvl1_optical_flow.cpp diff --git a/samples/ocl/facedetect.cpp b/samples/ocl/facedetect.cpp index 684c2d923..a49610aeb 100644 --- a/samples/ocl/facedetect.cpp +++ b/samples/ocl/facedetect.cpp @@ -7,55 +7,67 @@ using namespace std; using namespace cv; -#define LOOP_NUM 10 +#define LOOP_NUM 10 const static Scalar colors[] = { CV_RGB(0,0,255), - CV_RGB(0,128,255), - CV_RGB(0,255,255), - CV_RGB(0,255,0), - CV_RGB(255,128,0), - CV_RGB(255,255,0), - CV_RGB(255,0,0), - CV_RGB(255,0,255)} ; + CV_RGB(0,128,255), + CV_RGB(0,255,255), + CV_RGB(0,255,0), + CV_RGB(255,128,0), + CV_RGB(255,255,0), + CV_RGB(255,0,0), + CV_RGB(255,0,255) + } ; + int64 work_begin = 0; int64 work_end = 0; +string outputName; -static void workBegin() -{ +static void workBegin() +{ work_begin = getTickCount(); } static void workEnd() { work_end += (getTickCount() - work_begin); } -static double getTime(){ +static double getTime() +{ return work_end /((double)cvGetTickFrequency() * 1000.); } -void detect( Mat& img, vector& faces, - cv::ocl::OclCascadeClassifierBuf& cascade, - double scale, bool calTime); -void detectCPU( Mat& img, vector& faces, - CascadeClassifier& cascade, - double scale, bool calTime); +void detect( Mat& img, vector& faces, + ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime); + + +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime); + void Draw(Mat& img, vector& faces, double scale); + // This function test if gpu_rst matches cpu_rst. // If the two vectors are not equal, it will return the difference in vector size // Else if will return (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) -double checkRectSimilarity(Size sz, std::vector& cpu_rst, std::vector& gpu_rst); +double checkRectSimilarity(Size sz, vector& cpu_rst, vector& gpu_rst); + int main( int argc, const char** argv ) { const char* keys = "{ h | help | false | print help message }" "{ i | input | | specify input image }" - "{ t | template | ../../../data/haarcascades/haarcascade_frontalface_alt.xml | specify template file }" + "{ t | template | haarcascade_frontalface_alt.xml |" + " specify template file path }" "{ c | scale | 1.0 | scale image }" - "{ s | use_cpu | false | use cpu or gpu to process the image }"; + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ o | output | facedetect_output.jpg |" + " specify output image save path(only works when input is images) }"; CommandLineParser cmd(argc, argv, keys); if (cmd.get("help")) @@ -69,9 +81,10 @@ int main( int argc, const char** argv ) bool useCPU = cmd.get("s"); string inputName = cmd.get("i"); + outputName = cmd.get("o"); string cascadeName = cmd.get("t"); double scale = cmd.get("c"); - cv::ocl::OclCascadeClassifierBuf cascade; + ocl::OclCascadeClassifierBuf cascade; CascadeClassifier cpu_cascade; if( !cascade.load( cascadeName ) || !cpu_cascade.load(cascadeName) ) @@ -83,7 +96,7 @@ int main( int argc, const char** argv ) if( inputName.empty() ) { capture = cvCaptureFromCAM(0); - if(!capture) + if(!capture) cout << "Capture from CAM 0 didn't work" << endl; } else if( inputName.size() ) @@ -92,7 +105,7 @@ int main( int argc, const char** argv ) if( image.empty() ) { capture = cvCaptureFromAVI( inputName.c_str() ); - if(!capture) + if(!capture) cout << "Capture from AVI didn't work" << endl; return -1; } @@ -100,14 +113,15 @@ int main( int argc, const char** argv ) else { image = imread( "lena.jpg", 1 ); - if(image.empty()) + if(image.empty()) cout << "Couldn't read lena.jpg" << endl; return -1; } + cvNamedWindow( "result", 1 ); - std::vector oclinfo; - int devnums = cv::ocl::getDevice(oclinfo); + vector oclinfo; + int devnums = ocl::getDevice(oclinfo); if( devnums < 1 ) { std::cout << "no device found\n"; @@ -130,19 +144,23 @@ int main( int argc, const char** argv ) frame.copyTo( frameCopy ); else flip( frame, frameCopy, 0 ); - if(useCPU){ + if(useCPU) + { detectCPU(frameCopy, faces, cpu_cascade, scale, false); } - else{ - detect(frameCopy, faces, cascade, scale, false); + else + { + detect(frameCopy, faces, cascade, scale, false); } Draw(frameCopy, faces, scale); if( waitKey( 10 ) >= 0 ) goto _cleanup_; } + waitKey(0); + _cleanup_: cvReleaseCapture( &capture ); } @@ -152,18 +170,21 @@ _cleanup_: vector faces; vector ref_rst; double accuracy = 0.; - for(int i = 0; i <= LOOP_NUM;i ++) + for(int i = 0; i <= LOOP_NUM; i ++) { cout << "loop" << i << endl; - if(useCPU){ - detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); + if(useCPU) + { + detectCPU(image, faces, cpu_cascade, scale, i==0?false:true); } - else{ + else + { detect(image, faces, cascade, scale, i==0?false:true); - if(i == 0){ + if(i == 0) + { detectCPU(image, ref_rst, cpu_cascade, scale, false); accuracy = checkRectSimilarity(image.size(), ref_rst, faces); - } + } } if (i == LOOP_NUM) { @@ -180,31 +201,31 @@ _cleanup_: } cvDestroyWindow("result"); - return 0; } -void detect( Mat& img, vector& faces, - cv::ocl::OclCascadeClassifierBuf& cascade, - double scale, bool calTime) +void detect( Mat& img, vector& faces, + ocl::OclCascadeClassifierBuf& cascade, + double scale, bool calTime) { - cv::ocl::oclMat image(img); - cv::ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); + ocl::oclMat image(img); + ocl::oclMat gray, smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); if(calTime) workBegin(); - cv::ocl::cvtColor( image, gray, CV_BGR2GRAY ); - cv::ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); - cv::ocl::equalizeHist( smallImg, smallImg ); + ocl::cvtColor( image, gray, CV_BGR2GRAY ); + ocl::resize( gray, smallImg, smallImg.size(), 0, 0, INTER_LINEAR ); + ocl::equalizeHist( smallImg, smallImg ); cascade.detectMultiScale( smallImg, faces, 1.1, - 3, 0 - |CV_HAAR_SCALE_IMAGE - , Size(30,30), Size(0, 0) ); + 3, 0 + |CV_HAAR_SCALE_IMAGE + , Size(30,30), Size(0, 0) ); if(calTime) workEnd(); } -void detectCPU( Mat& img, vector& faces, - CascadeClassifier& cascade, - double scale, bool calTime) + +void detectCPU( Mat& img, vector& faces, + CascadeClassifier& cascade, + double scale, bool calTime) { if(calTime) workBegin(); Mat cpu_gray, cpu_smallImg( cvRound (img.rows/scale), cvRound(img.cols/scale), CV_8UC1 ); @@ -212,11 +233,12 @@ void detectCPU( Mat& img, vector& faces, resize(cpu_gray, cpu_smallImg, cpu_smallImg.size(), 0, 0, INTER_LINEAR); equalizeHist(cpu_smallImg, cpu_smallImg); cascade.detectMultiScale(cpu_smallImg, faces, 1.1, - 3, 0 | CV_HAAR_SCALE_IMAGE, - Size(30, 30), Size(0, 0)); - if(calTime) workEnd(); + 3, 0 | CV_HAAR_SCALE_IMAGE, + Size(30, 30), Size(0, 0)); + if(calTime) workEnd(); } + void Draw(Mat& img, vector& faces, double scale) { int i = 0; @@ -230,31 +252,38 @@ void Draw(Mat& img, vector& faces, double scale) radius = cvRound((r->width + r->height)*0.25*scale); circle( img, center, radius, color, 3, 8, 0 ); } - cv::imshow( "result", img ); + imshow( "result", img ); + imwrite( outputName, img ); } -double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& ob2) + +double checkRectSimilarity(Size sz, vector& ob1, vector& ob2) { double final_test_result = 0.0; size_t sz1 = ob1.size(); size_t sz2 = ob2.size(); if(sz1 != sz2) + { return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } else { - cv::Mat cpu_result(sz, CV_8UC1); + if(sz1==0 && sz2==0) + return 0; + Mat cpu_result(sz, CV_8UC1); cpu_result.setTo(0); for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) - { - cv::Mat cpu_result_roi(cpu_result, *r); + { + Mat cpu_result_roi(cpu_result, *r); cpu_result_roi.setTo(1); cpu_result.copyTo(cpu_result); } - int cpu_area = cv::countNonZero(cpu_result > 0); + int cpu_area = countNonZero(cpu_result > 0); - cv::Mat gpu_result(sz, CV_8UC1); + + Mat gpu_result(sz, CV_8UC1); gpu_result.setTo(0); for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) { @@ -263,11 +292,13 @@ double checkRectSimilarity(Size sz, std::vector& ob1, std::vector& o gpu_result.copyTo(gpu_result); } - cv::Mat result_; + Mat result_; multiply(cpu_result, gpu_result, result_); - int result = cv::countNonZero(result_ > 0); - - final_test_result = 1.0 - (double)result/(double)cpu_area; + int result = countNonZero(result_ > 0); + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; } return final_test_result; } diff --git a/samples/ocl/hog.cpp b/samples/ocl/hog.cpp index 28be6fa9a..ff53e010c 100644 --- a/samples/ocl/hog.cpp +++ b/samples/ocl/hog.cpp @@ -10,75 +10,39 @@ using namespace std; using namespace cv; -bool help_showed = false; - -class Args -{ -public: - Args(); - static Args read(int argc, char** argv); - - string src; - bool src_is_video; - bool src_is_camera; - int camera_id; - - bool write_video; - string dst_video; - double dst_video_fps; - - bool make_gray; - - bool resize_src; - int width, height; - - double scale; - int nlevels; - int gr_threshold; - - double hit_threshold; - bool hit_threshold_auto; - - int win_width; - int win_stride_width, win_stride_height; - - bool gamma_corr; -}; - class App { public: - App(const Args& s); + App(CommandLineParser& cmd); void run(); - void handleKey(char key); - void hogWorkBegin(); void hogWorkEnd(); string hogWorkFps() const; - void workBegin(); void workEnd(); string workFps() const; - string message() const; + // This function test if gpu_rst matches cpu_rst. // If the two vectors are not equal, it will return the difference in vector size -// Else if will return +// Else if will return // (total diff of each cpu and gpu rects covered pixels)/(total cpu rects covered pixels) - double checkRectSimilarity(Size sz, - std::vector& cpu_rst, + double checkRectSimilarity(Size sz, + std::vector& cpu_rst, std::vector& gpu_rst); private: App operator=(App&); - Args args; + //Args args; bool running; - bool use_gpu; bool make_gray; double scale; + double resize_scale; + int win_width; + int win_stride_width, win_stride_height; int gr_threshold; int nlevels; double hit_threshold; @@ -86,119 +50,49 @@ private: int64 hog_work_begin; double hog_work_fps; - int64 work_begin; double work_fps; -}; -static void printHelp() -{ - cout << "Histogram of Oriented Gradients descriptor and detector sample.\n" - << "\nUsage: hog_gpu\n" - << " (|--video |--camera ) # frames source\n" - << " [--make_gray ] # convert image to gray one or not\n" - << " [--resize_src ] # do resize of the source image or not\n" - << " [--width ] # resized image width\n" - << " [--height ] # resized image height\n" - << " [--hit_threshold ] # classifying plane distance threshold (0.0 usually)\n" - << " [--scale ] # HOG window scale factor\n" - << " [--nlevels ] # max number of HOG window scales\n" - << " [--win_width ] # width of the window (48 or 64)\n" - << " [--win_stride_width ] # distance by OX axis between neighbour wins\n" - << " [--win_stride_height ] # distance by OY axis between neighbour wins\n" - << " [--gr_threshold ] # merging similar rects constant\n" - << " [--gamma_correct ] # do gamma correction or not\n" - << " [--write_video ] # write video or not\n" - << " [--dst_video ] # output video path\n" - << " [--dst_video_fps ] # output video fps\n"; - help_showed = true; -} + string img_source; + string vdo_source; + string output; + int camera_id; +}; int main(int argc, char** argv) { + const char* keys = + "{ h | help | false | print help message }" + "{ i | input | | specify input image}" + "{ c | camera | -1 | enable camera capturing }" + "{ v | video | | use video as input }" + "{ g | gray | false | convert image to gray one or not}" + "{ s | scale | 1.0 | resize the image before detect}" + "{ l |larger_win| false | use 64x128 window}" + "{ o | output | | specify output path when input is images}"; + CommandLineParser cmd(argc, argv, keys); + App app(cmd); try { - if (argc < 2) - printHelp(); - Args args = Args::read(argc, argv); - if (help_showed) - return -1; - App app(args); app.run(); } - catch (const Exception& e) { return cout << "error: " << e.what() << endl, 1; } - catch (const exception& e) { return cout << "error: " << e.what() << endl, 1; } - catch(...) { return cout << "unknown exception" << endl, 1; } + catch (const Exception& e) + { + return cout << "error: " << e.what() << endl, 1; + } + catch (const exception& e) + { + return cout << "error: " << e.what() << endl, 1; + } + catch(...) + { + return cout << "unknown exception" << endl, 1; + } return 0; } - -Args::Args() +App::App(CommandLineParser& cmd) { - src_is_video = false; - src_is_camera = false; - camera_id = 0; - - write_video = false; - dst_video_fps = 24.; - - make_gray = false; - - resize_src = false; - width = 640; - height = 480; - - scale = 1.05; - nlevels = 13; - gr_threshold = 8; - hit_threshold = 1.4; - hit_threshold_auto = true; - - win_width = 48; - win_stride_width = 8; - win_stride_height = 8; - - gamma_corr = true; -} - - -Args Args::read(int argc, char** argv) -{ - Args args; - for (int i = 1; i < argc; i++) - { - if (string(argv[i]) == "--make_gray") args.make_gray = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--resize_src") args.resize_src = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--width") args.width = atoi(argv[++i]); - else if (string(argv[i]) == "--height") args.height = atoi(argv[++i]); - else if (string(argv[i]) == "--hit_threshold") - { - args.hit_threshold = atof(argv[++i]); - args.hit_threshold_auto = false; - } - else if (string(argv[i]) == "--scale") args.scale = atof(argv[++i]); - else if (string(argv[i]) == "--nlevels") args.nlevels = atoi(argv[++i]); - else if (string(argv[i]) == "--win_width") args.win_width = atoi(argv[++i]); - else if (string(argv[i]) == "--win_stride_width") args.win_stride_width = atoi(argv[++i]); - else if (string(argv[i]) == "--win_stride_height") args.win_stride_height = atoi(argv[++i]); - else if (string(argv[i]) == "--gr_threshold") args.gr_threshold = atoi(argv[++i]); - else if (string(argv[i]) == "--gamma_correct") args.gamma_corr = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--write_video") args.write_video = (string(argv[++i]) == "true"); - else if (string(argv[i]) == "--dst_video") args.dst_video = argv[++i]; - else if (string(argv[i]) == "--dst_video_fps") args.dst_video_fps = atof(argv[++i]); - else if (string(argv[i]) == "--help") printHelp(); - else if (string(argv[i]) == "--video") { args.src = argv[++i]; args.src_is_video = true; } - else if (string(argv[i]) == "--camera") { args.camera_id = atoi(argv[++i]); args.src_is_camera = true; } - else if (args.src.empty()) args.src = argv[i]; - else throw runtime_error((string("unknown key: ") + argv[i])); - } - return args; -} - - -App::App(const Args& s) -{ - args = s; cout << "\nControls:\n" << "\tESC - exit\n" << "\tm - change mode GPU <-> CPU\n" @@ -209,56 +103,56 @@ App::App(const Args& s) << "\t4/r - increase/decrease hit threshold\n" << endl; + use_gpu = true; - make_gray = args.make_gray; - scale = args.scale; - gr_threshold = args.gr_threshold; - nlevels = args.nlevels; + make_gray = cmd.get("g"); + resize_scale = cmd.get("s"); + win_width = cmd.get("l") == true ? 64 : 48; + vdo_source = cmd.get("v"); + img_source = cmd.get("i"); + output = cmd.get("o"); + camera_id = cmd.get("c"); - if (args.hit_threshold_auto) - args.hit_threshold = args.win_width == 48 ? 1.4 : 0.; - hit_threshold = args.hit_threshold; + win_stride_width = 8; + win_stride_height = 8; + gr_threshold = 8; + nlevels = 13; + hit_threshold = win_width == 48 ? 1.4 : 0.; + scale = 1.05; + gamma_corr = true; - gamma_corr = args.gamma_corr; - - if (args.win_width != 64 && args.win_width != 48) - args.win_width = 64; - - cout << "Scale: " << scale << endl; - if (args.resize_src) - cout << "Resized source: (" << args.width << ", " << args.height << ")\n"; cout << "Group threshold: " << gr_threshold << endl; cout << "Levels number: " << nlevels << endl; - cout << "Win width: " << args.win_width << endl; - cout << "Win stride: (" << args.win_stride_width << ", " << args.win_stride_height << ")\n"; + cout << "Win width: " << win_width << endl; + cout << "Win stride: (" << win_stride_width << ", " << win_stride_height << ")\n"; cout << "Hit threshold: " << hit_threshold << endl; cout << "Gamma correction: " << gamma_corr << endl; cout << endl; } - void App::run() { - std::vector oclinfo; + vector oclinfo; ocl::getDevice(oclinfo); running = true; - cv::VideoWriter video_writer; + VideoWriter video_writer; - Size win_size(args.win_width, args.win_width * 2); //(64, 128) or (48, 96) - Size win_stride(args.win_stride_width, args.win_stride_height); + Size win_size(win_width, win_width * 2); + Size win_stride(win_stride_width, win_stride_height); // Create HOG descriptors and detectors here vector detector; if (win_size == Size(64, 128)) - detector = cv::ocl::HOGDescriptor::getPeopleDetector64x128(); + detector = ocl::HOGDescriptor::getPeopleDetector64x128(); else - detector = cv::ocl::HOGDescriptor::getPeopleDetector48x96(); + detector = ocl::HOGDescriptor::getPeopleDetector48x96(); - cv::ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, - cv::ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, - cv::ocl::HOGDescriptor::DEFAULT_NLEVELS); - cv::HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, - HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); + + ocl::HOGDescriptor gpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, + ocl::HOGDescriptor::DEFAULT_WIN_SIGMA, 0.2, gamma_corr, + ocl::HOGDescriptor::DEFAULT_NLEVELS); + HOGDescriptor cpu_hog(win_size, Size(16, 16), Size(8, 8), Size(8, 8), 9, 1, -1, + HOGDescriptor::L2Hys, 0.2, gamma_corr, cv::HOGDescriptor::DEFAULT_NLEVELS); gpu_hog.setSVMDetector(detector); cpu_hog.setSVMDetector(detector); @@ -267,29 +161,29 @@ void App::run() VideoCapture vc; Mat frame; - if (args.src_is_video) + if (vdo_source!="") { - vc.open(args.src.c_str()); + vc.open(vdo_source.c_str()); if (!vc.isOpened()) - throw runtime_error(string("can't open video file: " + args.src)); + throw runtime_error(string("can't open video file: " + vdo_source)); vc >> frame; } - else if (args.src_is_camera) + else if (camera_id != -1) { - vc.open(args.camera_id); + vc.open(camera_id); if (!vc.isOpened()) { stringstream msg; - msg << "can't open camera: " << args.camera_id; + msg << "can't open camera: " << camera_id; throw runtime_error(msg.str()); } vc >> frame; } else { - frame = imread(args.src); + frame = imread(img_source); if (frame.empty()) - throw runtime_error(string("can't open image file: " + args.src)); + throw runtime_error(string("can't open image file: " + img_source)); } Mat img_aux, img, img_to_show; @@ -307,13 +201,15 @@ void App::run() else frame.copyTo(img_aux); // Resize image - if (args.resize_src) resize(img_aux, img, Size(args.width, args.height)); + if (abs(scale-1.0)>0.001) + { + Size sz((int)((double)img_aux.cols/resize_scale), (int)((double)img_aux.rows/resize_scale)); + resize(img_aux, img, sz); + } else img = img_aux; img_to_show = img; - gpu_hog.nlevels = nlevels; cpu_hog.nlevels = nlevels; - vector found; // Perform HOG classification @@ -330,15 +226,16 @@ void App::run() vector ref_rst; cvtColor(img, img, CV_BGRA2BGR); cpu_hog.detectMultiScale(img, ref_rst, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold-2); + Size(0, 0), scale, gr_threshold-2); double accuracy = checkRectSimilarity(img.size(), ref_rst, found); - cout << "\naccuracy value: " << accuracy << endl; - } - } + cout << "\naccuracy value: " << accuracy << endl; + } + } else cpu_hog.detectMultiScale(img, found, hit_threshold, win_stride, - Size(0, 0), scale, gr_threshold); + Size(0, 0), scale, gr_threshold); hogWorkEnd(); + // Draw positive classified windows for (size_t i = 0; i < found.size(); i++) { @@ -353,25 +250,31 @@ void App::run() putText(img_to_show, "FPS (HOG only): " + hogWorkFps(), Point(5, 65), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); putText(img_to_show, "FPS (total): " + workFps(), Point(5, 105), FONT_HERSHEY_SIMPLEX, 1., Scalar(255, 100, 0), 2); imshow("opencv_gpu_hog", img_to_show); - - if (args.src_is_video || args.src_is_camera) vc >> frame; + if (vdo_source!="" || camera_id!=-1) vc >> frame; workEnd(); - if (args.write_video) + if (output!="") { - if (!video_writer.isOpened()) + if (img_source!="") // wirte image { - video_writer.open(args.dst_video, CV_FOURCC('x','v','i','d'), args.dst_video_fps, - img_to_show.size(), true); - if (!video_writer.isOpened()) - throw std::runtime_error("can't create video writer"); + imwrite(output, img_to_show); } + else //write video + { + if (!video_writer.isOpened()) + { + video_writer.open(output, CV_FOURCC('x','v','i','d'), 24, + img_to_show.size(), true); + if (!video_writer.isOpened()) + throw std::runtime_error("can't create video writer"); + } - if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR); - else cvtColor(img_to_show, img, CV_BGRA2BGR); + if (make_gray) cvtColor(img_to_show, img, CV_GRAY2BGR); + else cvtColor(img_to_show, img, CV_BGRA2BGR); - video_writer << img; + video_writer << img; + } } handleKey((char)waitKey(3)); @@ -379,7 +282,6 @@ void App::run() } } - void App::handleKey(char key) { switch (key) @@ -442,7 +344,10 @@ void App::handleKey(char key) } -inline void App::hogWorkBegin() { hog_work_begin = getTickCount(); } +inline void App::hogWorkBegin() +{ + hog_work_begin = getTickCount(); +} inline void App::hogWorkEnd() { @@ -458,8 +363,10 @@ inline string App::hogWorkFps() const return ss.str(); } - -inline void App::workBegin() { work_begin = getTickCount(); } +inline void App::workBegin() +{ + work_begin = getTickCount(); +} inline void App::workEnd() { @@ -475,8 +382,9 @@ inline string App::workFps() const return ss.str(); } -double App::checkRectSimilarity(Size sz, - std::vector& ob1, + +double App::checkRectSimilarity(Size sz, + std::vector& ob1, std::vector& ob2) { double final_test_result = 0.0; @@ -484,20 +392,26 @@ double App::checkRectSimilarity(Size sz, size_t sz2 = ob2.size(); if(sz1 != sz2) + { return sz1 > sz2 ? (double)(sz1 - sz2) : (double)(sz2 - sz1); + } else { + if(sz1==0 && sz2==0) + return 0; cv::Mat cpu_result(sz, CV_8UC1); cpu_result.setTo(0); + for(vector::const_iterator r = ob1.begin(); r != ob1.end(); r++) - { + { cv::Mat cpu_result_roi(cpu_result, *r); cpu_result_roi.setTo(1); cpu_result.copyTo(cpu_result); } int cpu_area = cv::countNonZero(cpu_result > 0); + cv::Mat gpu_result(sz, CV_8UC1); gpu_result.setTo(0); for(vector::const_iterator r2 = ob2.begin(); r2 != ob2.end(); r2++) @@ -510,10 +424,11 @@ double App::checkRectSimilarity(Size sz, cv::Mat result_; multiply(cpu_result, gpu_result, result_); int result = cv::countNonZero(result_ > 0); - - final_test_result = 1.0 - (double)result/(double)cpu_area; + if(cpu_area!=0 && result!=0) + final_test_result = 1.0 - (double)result/(double)cpu_area; + else if(cpu_area==0 && result!=0) + final_test_result = -1; } return final_test_result; - } diff --git a/samples/ocl/pyrlk_optical_flow.cpp b/samples/ocl/pyrlk_optical_flow.cpp index cc8d886f7..cefa92867 100644 --- a/samples/ocl/pyrlk_optical_flow.cpp +++ b/samples/ocl/pyrlk_optical_flow.cpp @@ -11,19 +11,20 @@ using namespace cv; using namespace cv::ocl; typedef unsigned char uchar; -#define LOOP_NUM 10 +#define LOOP_NUM 10 int64 work_begin = 0; int64 work_end = 0; -static void workBegin() -{ +static void workBegin() +{ work_begin = getTickCount(); } static void workEnd() { work_end += (getTickCount() - work_begin); } -static double getTime(){ +static double getTime() +{ return work_end * 1000. / getTickFrequency(); } @@ -93,14 +94,15 @@ int main(int argc, const char* argv[]) //set this to save kernel compile time from second time you run ocl::setBinpath("./"); const char* keys = - "{ h | help | false | print help message }" - "{ l | left | | specify left image }" - "{ r | right | | specify right image }" - "{ c | camera | 0 | enable camera capturing }" - "{ s | use_cpu | false | use cpu or gpu to process the image }" - "{ v | video | | use video as input }" - "{ points | points | 1000 | specify points count [GoodFeatureToTrack] }" - "{ min_dist | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"; + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ c | camera | 0 | specify camera id }" + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ v | video | | use video as input }" + "{ o | output | pyrlk_output.jpg| specify output save path when input is images }" + "{ p | points | 1000 | specify points count [GoodFeatureToTrack] }" + "{ m | min_dist | 0 | specify minimal distance between points [GoodFeatureToTrack] }"; CommandLineParser cmd(argc, argv, keys); @@ -113,13 +115,13 @@ int main(int argc, const char* argv[]) } bool defaultPicturesFail = false; - string fname0 = cmd.get("left"); - string fname1 = cmd.get("right"); - string vdofile = cmd.get("video"); - int points = cmd.get("points"); - double minDist = cmd.get("min_dist"); + string fname0 = cmd.get("l"); + string fname1 = cmd.get("r"); + string vdofile = cmd.get("v"); + string outfile = cmd.get("o"); + int points = cmd.get("p"); + double minDist = cmd.get("m"); bool useCPU = cmd.get("s"); - bool useCamera = cmd.get("c"); int inputName = cmd.get("c"); oclMat d_nextPts, d_status; @@ -132,22 +134,9 @@ int main(int argc, const char* argv[]) vector status(points); vector err; - if (frame0.empty() || frame1.empty()) - { - useCamera = true; - defaultPicturesFail = true; - CvCapture* capture = 0; - capture = cvCaptureFromCAM( inputName ); - if (!capture) - { - cout << "Can't load input images" << endl; - return -1; - } - } - cout << "Points count : " << points << endl << endl; - if (useCamera) + if (frame0.empty() || frame1.empty()) { CvCapture* capture = 0; Mat frame, frameCopy; @@ -241,10 +230,10 @@ _cleanup_: else { nocamera: - for(int i = 0; i <= LOOP_NUM;i ++) + for(int i = 0; i <= LOOP_NUM; i ++) { cout << "loop" << i << endl; - if (i > 0) workBegin(); + if (i > 0) workBegin(); if (useCPU) { @@ -274,8 +263,8 @@ nocamera: cout << getTime() / LOOP_NUM << " ms" << endl; drawArrows(frame0, pts, nextPts, status, Scalar(255, 0, 0)); - imshow("PyrLK [Sparse]", frame0); + imwrite(outfile, frame0); } } } diff --git a/samples/ocl/squares.cpp b/samples/ocl/squares.cpp index 6b184161f..48964ffb2 100644 --- a/samples/ocl/squares.cpp +++ b/samples/ocl/squares.cpp @@ -6,7 +6,6 @@ #include "opencv2/imgproc/imgproc.hpp" #include "opencv2/highgui/highgui.hpp" #include "opencv2/ocl/ocl.hpp" - #include #include #include @@ -14,23 +13,50 @@ using namespace cv; using namespace std; -static void help() -{ - cout << - "\nA program using OCL module pyramid scaling, Canny, dilate functions, threshold, split; cpu contours, contour simpification and\n" - "memory storage (it's got it all folks) to find\n" - "squares in a list of images pic1-6.png\n" - "Returns sequence of squares detected on the image.\n" - "the sequence is stored in the specified memory storage\n" - "Call:\n" - "./squares\n" - "Using OpenCV version %s\n" << CV_VERSION << "\n" << endl; -} +#define ACCURACY_CHECK 1 +#if ACCURACY_CHECK +// check if two vectors of vector of points are near or not +// prior assumption is that they are in correct order +static bool checkPoints( + vector< vector > set1, + vector< vector > set2, + int maxDiff = 5) +{ + if(set1.size() != set2.size()) + { + return false; + } + + for(vector< vector >::iterator it1 = set1.begin(), it2 = set2.begin(); + it1 < set1.end() && it2 < set2.end(); it1 ++, it2 ++) + { + vector pts1 = *it1; + vector pts2 = *it2; + + + if(pts1.size() != pts2.size()) + { + return false; + } + for(size_t i = 0; i < pts1.size(); i ++) + { + Point pt1 = pts1[i], pt2 = pts2[i]; + if(std::abs(pt1.x - pt2.x) > maxDiff || + std::abs(pt1.y - pt2.y) > maxDiff) + { + return false; + } + } + } + return true; +} +#endif int thresh = 50, N = 11; const char* wndname = "OpenCL Square Detection Demo"; + // helper function: // finds a cosine of angle between vectors // from pt0->pt1 and from pt0->pt2 @@ -43,9 +69,92 @@ static double angle( Point pt1, Point pt2, Point pt0 ) return (dx1*dx2 + dy1*dy2)/sqrt((dx1*dx1 + dy1*dy1)*(dx2*dx2 + dy2*dy2) + 1e-10); } + // returns sequence of squares detected on the image. // the sequence is stored in the specified memory storage static void findSquares( const Mat& image, vector >& squares ) +{ + squares.clear(); + Mat pyr, timg, gray0(image.size(), CV_8U), gray; + + // down-scale and upscale the image to filter out the noise + pyrDown(image, pyr, Size(image.cols/2, image.rows/2)); + pyrUp(pyr, timg, image.size()); + vector > contours; + + // find squares in every color plane of the image + for( int c = 0; c < 3; c++ ) + { + int ch[] = {c, 0}; + mixChannels(&timg, 1, &gray0, 1, ch, 1); + + // try several threshold levels + for( int l = 0; l < N; l++ ) + { + // hack: use Canny instead of zero threshold level. + // Canny helps to catch squares with gradient shading + if( l == 0 ) + { + // apply Canny. Take the upper threshold from slider + // and set the lower to 0 (which forces edges merging) + Canny(gray0, gray, 0, thresh, 5); + // dilate canny output to remove potential + // holes between edge segments + dilate(gray, gray, Mat(), Point(-1,-1)); + } + else + { + // apply threshold if l!=0: + // tgray(x,y) = gray(x,y) < (l+1)*255/N ? 255 : 0 + cv::threshold(gray0, gray, (l+1)*255/N, 255, THRESH_BINARY); + } + + // find contours and store them all as a list + findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE); + + vector approx; + + // test each contour + for( size_t i = 0; i < contours.size(); i++ ) + { + // approximate contour with accuracy proportional + // to the contour perimeter + approxPolyDP(Mat(contours[i]), approx, arcLength(Mat(contours[i]), true)*0.02, true); + + // square contours should have 4 vertices after approximation + // relatively large area (to filter out noisy contours) + // and be convex. + // Note: absolute value of an area is used because + // area may be positive or negative - in accordance with the + // contour orientation + if( approx.size() == 4 && + fabs(contourArea(Mat(approx))) > 1000 && + isContourConvex(Mat(approx)) ) + { + double maxCosine = 0; + + for( int j = 2; j < 5; j++ ) + { + // find the maximum cosine of the angle between joint edges + double cosine = fabs(angle(approx[j%4], approx[j-2], approx[j-1])); + maxCosine = MAX(maxCosine, cosine); + } + + // if cosines of all angles are small + // (all angles are ~90 degree) then write quandrange + // vertices to resultant sequence + if( maxCosine < 0.3 ) + squares.push_back(approx); + } + } + } + } +} + + +// returns sequence of squares detected on the image. +// the sequence is stored in the specified memory storage +static void findSquares_ocl( const Mat& image, vector >& squares ) { squares.clear(); @@ -91,7 +200,6 @@ static void findSquares( const Mat& image, vector >& squares ) findContours(gray, contours, CV_RETR_LIST, CV_CHAIN_APPROX_SIMPLE); vector approx; - // test each contour for( size_t i = 0; i < contours.size(); i++ ) { @@ -106,11 +214,10 @@ static void findSquares( const Mat& image, vector >& squares ) // area may be positive or negative - in accordance with the // contour orientation if( approx.size() == 4 && - fabs(contourArea(Mat(approx))) > 1000 && - isContourConvex(Mat(approx)) ) + fabs(contourArea(Mat(approx))) > 1000 && + isContourConvex(Mat(approx)) ) { double maxCosine = 0; - for( int j = 2; j < 5; j++ ) { // find the maximum cosine of the angle between joint edges @@ -139,40 +246,93 @@ static void drawSquares( Mat& image, const vector >& squares ) int n = (int)squares[i].size(); polylines(image, &p, &n, 1, true, Scalar(0,255,0), 3, CV_AA); } - - imshow(wndname, image); } -int main(int /*argc*/, char** /*argv*/) +// draw both pure-C++ and ocl square results onto a single image +static Mat drawSquaresBoth( const Mat& image, + const vector >& sqsCPP, + const vector >& sqsOCL +) { + Mat imgToShow(Size(image.cols * 2, image.rows), image.type()); + Mat lImg = imgToShow(Rect(Point(0, 0), image.size())); + Mat rImg = imgToShow(Rect(Point(image.cols, 0), image.size())); + image.copyTo(lImg); + image.copyTo(rImg); + drawSquares(lImg, sqsCPP); + drawSquares(rImg, sqsOCL); + float fontScale = 0.8f; + Scalar white = Scalar::all(255), black = Scalar::all(0); + + putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2); + putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, black, 2); + putText(lImg, "C++", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1); + putText(rImg, "OCL", Point(10, 20), FONT_HERSHEY_COMPLEX_SMALL, fontScale, white, 1); + + return imgToShow; +} + + +int main(int argc, char** argv) +{ + const char* keys = + "{ i | input | | specify input image }" + "{ o | output | squares_output.jpg | specify output save path}"; + CommandLineParser cmd(argc, argv, keys); + string inputName = cmd.get("i"); + string outfile = cmd.get("o"); + if(inputName.empty()) + { + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } - //ocl::setBinpath("F:/kernel_bin"); vector info; CV_Assert(ocl::getDevice(info)); - - static const char* names[] = { "pic1.png", "pic2.png", "pic3.png", - "pic4.png", "pic5.png", "pic6.png", 0 }; - help(); + int iterations = 10; namedWindow( wndname, 1 ); - vector > squares; + vector > squares_cpu, squares_ocl; - for( int i = 0; names[i] != 0; i++ ) + Mat image = imread(inputName, 1); + if( image.empty() ) { - Mat image = imread(names[i], 1); - if( image.empty() ) - { - cout << "Couldn't load " << names[i] << endl; - continue; - } - - findSquares(image, squares); - drawSquares(image, squares); - - int c = waitKey(); - if( (char)c == 27 ) - break; + cout << "Couldn't load " << inputName << endl; + return -1; } + int j = iterations; + int64 t_ocl = 0, t_cpp = 0; + //warm-ups + cout << "warming up ..." << endl; + findSquares(image, squares_cpu); + findSquares_ocl(image, squares_ocl); + + +#if ACCURACY_CHECK + cout << "Checking ocl accuracy ... " << endl; + cout << (checkPoints(squares_cpu, squares_ocl) ? "Pass" : "Failed") << endl; +#endif + do + { + int64 t_start = cv::getTickCount(); + findSquares(image, squares_cpu); + t_cpp += cv::getTickCount() - t_start; + + + t_start = cv::getTickCount(); + findSquares_ocl(image, squares_ocl); + t_ocl += cv::getTickCount() - t_start; + cout << "run loop: " << j << endl; + } + while(--j); + cout << "cpp average time: " << 1000.0f * (double)t_cpp / getTickFrequency() / iterations << "ms" << endl; + cout << "ocl average time: " << 1000.0f * (double)t_ocl / getTickFrequency() / iterations << "ms" << endl; + + Mat result = drawSquaresBoth(image, squares_cpu, squares_ocl); + imshow(wndname, result); + imwrite(outfile, result); + cvWaitKey(0); return 0; } diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp index 7ac2c9a6f..565744baa 100644 --- a/samples/ocl/stereo_match.cpp +++ b/samples/ocl/stereo_match.cpp @@ -10,56 +10,45 @@ using namespace cv; using namespace std; using namespace ocl; -bool help_showed = false; - -struct Params -{ - Params(); - static Params read(int argc, char** argv); - - string left; - string right; - - string method_str() const - { - switch (method) - { - case BM: return "BM"; - case BP: return "BP"; - case CSBP: return "CSBP"; - } - return ""; - } - enum {BM, BP, CSBP} method; - int ndisp; // Max disparity + 1 - enum {GPU, CPU} type; -}; - struct App { - App(const Params& p); + App(CommandLineParser& cmd); void run(); void handleKey(char key); void printParams() const; - void workBegin() { work_begin = getTickCount(); } + void workBegin() + { + work_begin = getTickCount(); + } void workEnd() { int64 d = getTickCount() - work_begin; double f = getTickFrequency(); work_fps = f / d; } - + string method_str() const + { + switch (method) + { + case BM: + return "BM"; + case BP: + return "BP"; + case CSBP: + return "CSBP"; + } + return ""; + } string text() const { stringstream ss; - ss << "(" << p.method_str() << ") FPS: " << setiosflags(ios::left) - << setprecision(4) << work_fps; + ss << "(" << method_str() << ") FPS: " << setiosflags(ios::left) + << setprecision(4) << work_fps; return ss.str(); } private: - Params p; bool running; Mat left_src, right_src; @@ -72,42 +61,45 @@ private: int64 work_begin; double work_fps; -}; -static void printHelp() -{ - cout << "Usage: stereo_match_gpu\n" - << "\t--left --right # must be rectified\n" - << "\t--method # BM | BP | CSBP\n" - << "\t--ndisp # number of disparity levels\n" - << "\t--type # cpu | CPU | gpu | GPU\n"; - help_showed = true; -} + string l_img, r_img; + string out_img; + enum {BM, BP, CSBP} method; + int ndisp; // Max disparity + 1 + enum {GPU, CPU} type; +}; int main(int argc, char** argv) { + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ m | method | BM | specify match method(BM/BP/CSBP) }" + "{ n | ndisp | 64 | specify number of disparity levels }" + "{ s | cpu_ocl | false | use cpu or gpu as ocl device to process the image }" + "{ o | output | stereo_match_output.jpg | specify output path when input is images}"; + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } try { - if (argc < 2) - { - printHelp(); - return 1; - } + App app(cmd); + int flag = CVCL_DEVICE_TYPE_GPU; + if(cmd.get("s") == true) + flag = CVCL_DEVICE_TYPE_CPU; - Params args = Params::read(argc, argv); - if (help_showed) - return -1; - - int flags[2] = { CVCL_DEVICE_TYPE_GPU, CVCL_DEVICE_TYPE_CPU }; vector info; - - if(getDevice(info, flags[args.type]) == 0) + if(getDevice(info, flag) == 0) { throw runtime_error("Error: Did not find a valid OpenCL device!"); } cout << "Device name:" << info[0].DeviceName[0] << endl; - App app(args); app.run(); } catch (const exception& e) @@ -117,77 +109,39 @@ int main(int argc, char** argv) return 0; } - -Params::Params() -{ - method = BM; - ndisp = 64; - type = GPU; -} - - -Params Params::read(int argc, char** argv) -{ - Params p; - - for (int i = 1; i < argc; i++) - { - if (string(argv[i]) == "--left") p.left = argv[++i]; - else if (string(argv[i]) == "--right") p.right = argv[++i]; - else if (string(argv[i]) == "--method") - { - if (string(argv[i + 1]) == "BM") p.method = BM; - else if (string(argv[i + 1]) == "BP") p.method = BP; - else if (string(argv[i + 1]) == "CSBP") p.method = CSBP; - else throw runtime_error("unknown stereo match method: " + string(argv[i + 1])); - i++; - } - else if (string(argv[i]) == "--ndisp") p.ndisp = atoi(argv[++i]); - else if (string(argv[i]) == "--type") - { - string t(argv[++i]); - if (t == "cpu" || t == "CPU") - { - p.type = CPU; - } - else if (t == "gpu" || t == "GPU") - { - p.type = GPU; - } - else throw runtime_error("unknown device type: " + t); - } - else if (string(argv[i]) == "--help") printHelp(); - else throw runtime_error("unknown key: " + string(argv[i])); - } - - return p; -} - - -App::App(const Params& params) - : p(params), running(false) +App::App(CommandLineParser& cmd) + : running(false),method(BM) { cout << "stereo_match_ocl sample\n"; cout << "\nControls:\n" - << "\tesc - exit\n" - << "\tp - print current parameters\n" - << "\tg - convert source images into gray\n" - << "\tm - change stereo match method\n" - << "\ts - change Sobel prefiltering flag (for BM only)\n" - << "\t1/q - increase/decrease maximum disparity\n" - << "\t2/w - increase/decrease window size (for BM only)\n" - << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n" - << "\t4/r - increase/decrease level count (for BP and CSBP only)\n"; + << "\tesc - exit\n" + << "\tp - print current parameters\n" + << "\tg - convert source images into gray\n" + << "\tm - change stereo match method\n" + << "\ts - change Sobel prefiltering flag (for BM only)\n" + << "\t1/q - increase/decrease maximum disparity\n" + << "\t2/w - increase/decrease window size (for BM only)\n" + << "\t3/e - increase/decrease iteration count (for BP and CSBP only)\n" + << "\t4/r - increase/decrease level count (for BP and CSBP only)\n"; + l_img = cmd.get("l"); + r_img = cmd.get("r"); + string mstr = cmd.get("m"); + if(mstr == "BM") method = BM; + else if(mstr == "BP") method = BP; + else if(mstr == "CSBP") method = CSBP; + else cout << "unknown method!\n"; + ndisp = cmd.get("n"); + out_img = cmd.get("o"); } void App::run() { // Load images - left_src = imread(p.left); - right_src = imread(p.right); - if (left_src.empty()) throw runtime_error("can't open file \"" + p.left + "\""); - if (right_src.empty()) throw runtime_error("can't open file \"" + p.right + "\""); + left_src = imread(l_img); + right_src = imread(r_img); + if (left_src.empty()) throw runtime_error("can't open file \"" + l_img + "\""); + if (right_src.empty()) throw runtime_error("can't open file \"" + r_img + "\""); cvtColor(left_src, left, CV_BGR2GRAY); cvtColor(right_src, right, CV_BGR2GRAY); @@ -199,14 +153,15 @@ void App::run() imshow("right", right); // Set common parameters - bm.ndisp = p.ndisp; - bp.ndisp = p.ndisp; - csbp.ndisp = p.ndisp; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; cout << endl; printParams(); running = true; + bool written = false; while (running) { @@ -214,9 +169,9 @@ void App::run() Mat disp; oclMat d_disp; workBegin(); - switch (p.method) + switch (method) { - case Params::BM: + case BM: if (d_left.channels() > 1 || d_right.channels() > 1) { cout << "BM doesn't support color images\n"; @@ -230,25 +185,28 @@ void App::run() } bm(d_left, d_right, d_disp); break; - case Params::BP: + case BP: bp(d_left, d_right, d_disp); break; - case Params::CSBP: + case CSBP: csbp(d_left, d_right, d_disp); break; } - ocl::finish(); workEnd(); // Show results d_disp.download(disp); - if (p.method != Params::BM) + if (method != BM) { disp.convertTo(disp, 0); } putText(disp, text(), Point(5, 25), FONT_HERSHEY_SIMPLEX, 1.0, Scalar::all(255)); imshow("disparity", disp); - + if(!written) + { + imwrite(out_img, disp); + written = true; + } handleKey((char)waitKey(3)); } } @@ -259,19 +217,19 @@ void App::printParams() const cout << "--- Parameters ---\n"; cout << "image_size: (" << left.cols << ", " << left.rows << ")\n"; cout << "image_channels: " << left.channels() << endl; - cout << "method: " << p.method_str() << endl - << "ndisp: " << p.ndisp << endl; - switch (p.method) + cout << "method: " << method_str() << endl + << "ndisp: " << ndisp << endl; + switch (method) { - case Params::BM: + case BM: cout << "win_size: " << bm.winSize << endl; cout << "prefilter_sobel: " << bm.preset << endl; break; - case Params::BP: + case BP: cout << "iter_count: " << bp.iters << endl; cout << "level_count: " << bp.levels << endl; break; - case Params::CSBP: + case CSBP: cout << "iter_count: " << csbp.iters << endl; cout << "level_count: " << csbp.levels << endl; break; @@ -287,11 +245,13 @@ void App::handleKey(char key) case 27: running = false; break; - case 'p': case 'P': + case 'p': + case 'P': printParams(); break; - case 'g': case 'G': - if (left.channels() == 1 && p.method != Params::BM) + case 'g': + case 'G': + if (left.channels() == 1 && method != BM) { left = left_src; right = right_src; @@ -307,23 +267,25 @@ void App::handleKey(char key) imshow("left", left); imshow("right", right); break; - case 'm': case 'M': - switch (p.method) + case 'm': + case 'M': + switch (method) { - case Params::BM: - p.method = Params::BP; + case BM: + method = BP; break; - case Params::BP: - p.method = Params::CSBP; + case BP: + method = CSBP; break; - case Params::CSBP: - p.method = Params::BM; + case CSBP: + method = BM; break; } - cout << "method: " << p.method_str() << endl; + cout << "method: " << method_str() << endl; break; - case 's': case 'S': - if (p.method == Params::BM) + case 's': + case 'S': + if (method == BM) { switch (bm.preset) { @@ -338,76 +300,80 @@ void App::handleKey(char key) } break; case '1': - p.ndisp = p.ndisp == 1 ? 8 : p.ndisp + 8; - cout << "ndisp: " << p.ndisp << endl; - bm.ndisp = p.ndisp; - bp.ndisp = p.ndisp; - csbp.ndisp = p.ndisp; + ndisp == 1 ? ndisp = 8 : ndisp += 8; + cout << "ndisp: " << ndisp << endl; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; break; - case 'q': case 'Q': - p.ndisp = max(p.ndisp - 8, 1); - cout << "ndisp: " << p.ndisp << endl; - bm.ndisp = p.ndisp; - bp.ndisp = p.ndisp; - csbp.ndisp = p.ndisp; + case 'q': + case 'Q': + ndisp = max(ndisp - 8, 1); + cout << "ndisp: " << ndisp << endl; + bm.ndisp = ndisp; + bp.ndisp = ndisp; + csbp.ndisp = ndisp; break; case '2': - if (p.method == Params::BM) + if (method == BM) { bm.winSize = min(bm.winSize + 1, 51); cout << "win_size: " << bm.winSize << endl; } break; - case 'w': case 'W': - if (p.method == Params::BM) + case 'w': + case 'W': + if (method == BM) { bm.winSize = max(bm.winSize - 1, 2); cout << "win_size: " << bm.winSize << endl; } break; case '3': - if (p.method == Params::BP) + if (method == BP) { bp.iters += 1; cout << "iter_count: " << bp.iters << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.iters += 1; cout << "iter_count: " << csbp.iters << endl; } break; - case 'e': case 'E': - if (p.method == Params::BP) + case 'e': + case 'E': + if (method == BP) { bp.iters = max(bp.iters - 1, 1); cout << "iter_count: " << bp.iters << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.iters = max(csbp.iters - 1, 1); cout << "iter_count: " << csbp.iters << endl; } break; case '4': - if (p.method == Params::BP) + if (method == BP) { bp.levels += 1; cout << "level_count: " << bp.levels << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.levels += 1; cout << "level_count: " << csbp.levels << endl; } break; - case 'r': case 'R': - if (p.method == Params::BP) + case 'r': + case 'R': + if (method == BP) { bp.levels = max(bp.levels - 1, 1); cout << "level_count: " << bp.levels << endl; } - else if (p.method == Params::CSBP) + else if (method == CSBP) { csbp.levels = max(csbp.levels - 1, 1); cout << "level_count: " << csbp.levels << endl; diff --git a/samples/ocl/surf_matcher.cpp b/samples/ocl/surf_matcher.cpp index 038a8dc5c..bee517fbc 100644 --- a/samples/ocl/surf_matcher.cpp +++ b/samples/ocl/surf_matcher.cpp @@ -1,48 +1,3 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// @Authors -// Peng Xiao, pengxiao@multicorewareinc.com -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - #include #include #include "opencv2/core/core.hpp" @@ -61,27 +16,20 @@ const float GOOD_PORTION = 0.15f; namespace { -void help(); - -void help() -{ - std::cout << "\nThis program demonstrates using SURF_OCL features detector and descriptor extractor" << std::endl; - std::cout << "\nUsage:\n\tsurf_matcher --left --right [-c]" << std::endl; - std::cout << "\nExample:\n\tsurf_matcher --left box.png --right box_in_scene.png" << std::endl; -} int64 work_begin = 0; int64 work_end = 0; -void workBegin() -{ +void workBegin() +{ work_begin = getTickCount(); } void workEnd() { work_end = getTickCount() - work_begin; } -double getTime(){ +double getTime() +{ return work_end /((double)cvGetTickFrequency() * 1000.); } @@ -114,17 +62,17 @@ struct SURFMatcher Mat drawGoodMatches( const Mat& cpu_img1, const Mat& cpu_img2, - const vector& keypoints1, - const vector& keypoints2, + const vector& keypoints1, + const vector& keypoints2, vector& matches, vector& scene_corners_ - ) +) { - //-- Sort matches and preserve top 10% matches + //-- Sort matches and preserve top 10% matches std::sort(matches.begin(), matches.end()); std::vector< DMatch > good_matches; double minDist = matches.front().distance, - maxDist = matches.back().distance; + maxDist = matches.back().distance; const int ptsPairs = std::min(GOOD_PTS_MAX, (int)(matches.size() * GOOD_PORTION)); for( int i = 0; i < ptsPairs; i++ ) @@ -139,8 +87,8 @@ Mat drawGoodMatches( // drawing the results Mat img_matches; drawMatches( cpu_img1, keypoints1, cpu_img2, keypoints2, - good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), - vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); + good_matches, img_matches, Scalar::all(-1), Scalar::all(-1), + vector(), DrawMatchesFlags::NOT_DRAW_SINGLE_POINTS ); //-- Localize the object std::vector obj; @@ -154,28 +102,30 @@ Mat drawGoodMatches( } //-- Get the corners from the image_1 ( the object to be "detected" ) std::vector obj_corners(4); - obj_corners[0] = cvPoint(0,0); obj_corners[1] = cvPoint( cpu_img1.cols, 0 ); - obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); obj_corners[3] = cvPoint( 0, cpu_img1.rows ); + obj_corners[0] = cvPoint(0,0); + obj_corners[1] = cvPoint( cpu_img1.cols, 0 ); + obj_corners[2] = cvPoint( cpu_img1.cols, cpu_img1.rows ); + obj_corners[3] = cvPoint( 0, cpu_img1.rows ); std::vector scene_corners(4); - + Mat H = findHomography( obj, scene, CV_RANSAC ); perspectiveTransform( obj_corners, scene_corners, H); scene_corners_ = scene_corners; - + //-- Draw lines between the corners (the mapped object in the scene - image_2 ) - line( img_matches, - scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); - line( img_matches, - scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); - line( img_matches, - scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); - line( img_matches, - scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), - Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[1] + Point2f( (float)cpu_img1.cols, 0), scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[2] + Point2f( (float)cpu_img1.cols, 0), scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); + line( img_matches, + scene_corners[3] + Point2f( (float)cpu_img1.cols, 0), scene_corners[0] + Point2f( (float)cpu_img1.cols, 0), + Scalar( 0, 255, 0), 2, CV_AA ); return img_matches; } @@ -185,6 +135,21 @@ Mat drawGoodMatches( // use cpu findHomography interface to calculate the transformation matrix int main(int argc, char* argv[]) { + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ o | output | SURF_output.jpg | specify output save path (only works in CPU or GPU only mode) }" + "{ c | use_cpu | false | use CPU algorithms }" + "{ a | use_all | false | use both CPU and GPU algorithms}"; + CommandLineParser cmd(argc, argv, keys); + if (cmd.get("help")) + { + std::cout << "Avaible options:" << std::endl; + cmd.printParams(); + return 0; + } + vector info; if(cv::ocl::getDevice(info) == 0) { @@ -195,54 +160,38 @@ int main(int argc, char* argv[]) Mat cpu_img1, cpu_img2, cpu_img1_grey, cpu_img2_grey; oclMat img1, img2; - bool useCPU = false; + bool useCPU = cmd.get("c"); bool useGPU = false; - bool useALL = false; + bool useALL = cmd.get("a"); - for (int i = 1; i < argc; ++i) + string outpath = cmd.get("o"); + + cpu_img1 = imread(cmd.get("l")); + CV_Assert(!cpu_img1.empty()); + cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); + img1 = cpu_img1_grey; + + cpu_img2 = imread(cmd.get("r")); + CV_Assert(!cpu_img2.empty()); + cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); + img2 = cpu_img2_grey; + + if(useALL) { - if (string(argv[i]) == "--left") - { - cpu_img1 = imread(argv[++i]); - CV_Assert(!cpu_img1.empty()); - cvtColor(cpu_img1, cpu_img1_grey, CV_BGR2GRAY); - img1 = cpu_img1_grey; - } - else if (string(argv[i]) == "--right") - { - cpu_img2 = imread(argv[++i]); - CV_Assert(!cpu_img2.empty()); - cvtColor(cpu_img2, cpu_img2_grey, CV_BGR2GRAY); - img2 = cpu_img2_grey; - } - else if (string(argv[i]) == "-c") - { - useCPU = true; - useGPU = false; - useALL = false; - }else if(string(argv[i]) == "-g") - { - useGPU = true; - useCPU = false; - useALL = false; - }else if(string(argv[i]) == "-a") - { - useALL = true; - useCPU = false; - useGPU = false; - } - else if (string(argv[i]) == "--help") - { - help(); - return -1; - } + useCPU = false; + useGPU = false; } + else if(useCPU==false && useALL==false) + { + useGPU = true; + } + if(!useCPU) { std::cout - << "Device name:" - << info[0].DeviceName[0] - << std::endl; + << "Device name:" + << info[0].DeviceName[0] + << std::endl; } double surf_time = 0.; @@ -262,12 +211,12 @@ int main(int argc, char* argv[]) //instantiate detectors/matchers SURFDetector cpp_surf; SURFDetector ocl_surf; - + SURFMatcher cpp_matcher; SURFMatcher ocl_matcher; //-- start of timing section - if (useCPU) + if (useCPU) { for (int i = 0; i <= LOOP_NUM; i++) { @@ -298,7 +247,8 @@ int main(int argc, char* argv[]) surf_time = getTime(); std::cout << "SURF run time: " << surf_time / LOOP_NUM << " ms" << std::endl<<"\n"; - }else + } + else { //cpu runs for (int i = 0; i <= LOOP_NUM; i++) @@ -353,14 +303,14 @@ int main(int argc, char* argv[]) for(size_t i = 0; i < cpu_corner.size(); i++) { if((std::abs(cpu_corner[i].x - gpu_corner[i].x) > 10) - ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10)) + ||(std::abs(cpu_corner[i].y - gpu_corner[i].y) > 10)) { std::cout<<"Failed\n"; result = false; break; } result = true; - } + } if(result) std::cout<<"Passed\n"; } @@ -371,12 +321,15 @@ int main(int argc, char* argv[]) { namedWindow("cpu surf matches", 0); imshow("cpu surf matches", img_matches); + imwrite(outpath, img_matches); } else if(useGPU) { namedWindow("ocl surf matches", 0); imshow("ocl surf matches", img_matches); - }else + imwrite(outpath, img_matches); + } + else { namedWindow("cpu surf matches", 0); imshow("cpu surf matches", img_matches); diff --git a/samples/ocl/tvl1_optical_flow.cpp b/samples/ocl/tvl1_optical_flow.cpp new file mode 100644 index 000000000..cff9692ed --- /dev/null +++ b/samples/ocl/tvl1_optical_flow.cpp @@ -0,0 +1,265 @@ +#include +#include +#include + +#include "opencv2/highgui/highgui.hpp" +#include "opencv2/ocl/ocl.hpp" +#include "opencv2/video/video.hpp" + +using namespace std; +using namespace cv; +using namespace cv::ocl; + +typedef unsigned char uchar; +#define LOOP_NUM 10 +int64 work_begin = 0; +int64 work_end = 0; + +static void workBegin() +{ + work_begin = getTickCount(); +} +static void workEnd() +{ + work_end += (getTickCount() - work_begin); +} +static double getTime() +{ + return work_end * 1000. / getTickFrequency(); +} + +template inline T clamp (T x, T a, T b) +{ + return ((x) > (a) ? ((x) < (b) ? (x) : (b)) : (a)); +} + +template inline T mapValue(T x, T a, T b, T c, T d) +{ + x = clamp(x, a, b); + return c + (d - c) * (x - a) / (b - a); +} + +static void getFlowField(const Mat& u, const Mat& v, Mat& flowField) +{ + float maxDisplacement = 1.0f; + + for (int i = 0; i < u.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + for (int j = 0; j < u.cols; ++j) + { + float d = max(fabsf(ptr_u[j]), fabsf(ptr_v[j])); + + if (d > maxDisplacement) + maxDisplacement = d; + } + } + + flowField.create(u.size(), CV_8UC4); + + for (int i = 0; i < flowField.rows; ++i) + { + const float* ptr_u = u.ptr(i); + const float* ptr_v = v.ptr(i); + + + Vec4b* row = flowField.ptr(i); + + for (int j = 0; j < flowField.cols; ++j) + { + row[j][0] = 0; + row[j][1] = static_cast (mapValue (-ptr_v[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][2] = static_cast (mapValue ( ptr_u[j], -maxDisplacement, maxDisplacement, 0.0f, 255.0f)); + row[j][3] = 255; + } + } +} + + +int main(int argc, const char* argv[]) +{ + static std::vector ocl_info; + ocl::getDevice(ocl_info); + //if you want to use undefault device, set it here + setDevice(ocl_info[0]); + + //set this to save kernel compile time from second time you run + ocl::setBinpath("./"); + const char* keys = + "{ h | help | false | print help message }" + "{ l | left | | specify left image }" + "{ r | right | | specify right image }" + "{ o | output | tvl1_output.jpg | specify output save path }" + "{ c | camera | 0 | enable camera capturing }" + "{ s | use_cpu | false | use cpu or gpu to process the image }" + "{ v | video | | use video as input }"; + + CommandLineParser cmd(argc, argv, keys); + + if (cmd.get("help")) + { + cout << "Usage: pyrlk_optical_flow [options]" << endl; + cout << "Avaible options:" << endl; + cmd.printParams(); + return 0; + } + + bool defaultPicturesFail = false; + string fname0 = cmd.get("l"); + string fname1 = cmd.get("r"); + string vdofile = cmd.get("v"); + string outpath = cmd.get("o"); + bool useCPU = cmd.get("s"); + bool useCamera = cmd.get("c"); + int inputName = cmd.get("c"); + + Mat frame0 = imread(fname0, cv::IMREAD_GRAYSCALE); + Mat frame1 = imread(fname1, cv::IMREAD_GRAYSCALE); + cv::Ptr alg = cv::createOptFlow_DualTVL1(); + cv::ocl::OpticalFlowDual_TVL1_OCL d_alg; + + + Mat flow, show_flow; + Mat flow_vec[2]; + if (frame0.empty() || frame1.empty()) + { + useCamera = true; + defaultPicturesFail = true; + CvCapture* capture = 0; + capture = cvCaptureFromCAM( inputName ); + if (!capture) + { + cout << "Can't load input images" << endl; + return -1; + } + } + + + if (useCamera) + { + CvCapture* capture = 0; + Mat frame, frameCopy; + Mat frame0Gray, frame1Gray; + Mat ptr0, ptr1; + + if(vdofile == "") + capture = cvCaptureFromCAM( inputName ); + else + capture = cvCreateFileCapture(vdofile.c_str()); + + int c = inputName ; + if(!capture) + { + if(vdofile == "") + cout << "Capture from CAM " << c << " didn't work" << endl; + else + cout << "Capture from file " << vdofile << " failed" <calc(ptr0, ptr1, flow); + split(flow, flow_vec); + } + else + { + oclMat d_flowx, d_flowy; + d_alg(oclMat(ptr0), oclMat(ptr1), d_flowx, d_flowy); + d_flowx.download(flow_vec[0]); + d_flowy.download(flow_vec[1]); + } + if (i%2 == 1) + frame1.copyTo(frameCopy); + else + frame0.copyTo(frameCopy); + getFlowField(flow_vec[0], flow_vec[1], show_flow); + imshow("PyrLK [Sparse]", show_flow); + } + + if( waitKey( 10 ) >= 0 ) + goto _cleanup_; + } + + waitKey(0); + +_cleanup_: + cvReleaseCapture( &capture ); + } + else + { +nocamera: + oclMat d_flowx, d_flowy; + for(int i = 0; i <= LOOP_NUM; i ++) + { + cout << "loop" << i << endl; + + if (i > 0) workBegin(); + if (useCPU) + { + alg->calc(frame0, frame1, flow); + split(flow, flow_vec); + } + else + { + d_alg(oclMat(frame0), oclMat(frame1), d_flowx, d_flowy); + d_flowx.download(flow_vec[0]); + d_flowy.download(flow_vec[1]); + } + if (i > 0 && i <= LOOP_NUM) + workEnd(); + + if (i == LOOP_NUM) + { + if (useCPU) + cout << "average CPU time (noCamera) : "; + else + cout << "average GPU time (noCamera) : "; + cout << getTime() / LOOP_NUM << " ms" << endl; + + getFlowField(flow_vec[0], flow_vec[1], show_flow); + imshow("PyrLK [Sparse]", show_flow); + imwrite(outpath, show_flow); + } + } + } + + waitKey(); + + return 0; +} \ No newline at end of file From d58421c08eb578fe449e6b90cbeb7731fdb1a44b Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 19 Jun 2013 14:45:03 +0400 Subject: [PATCH 106/121] Make version-related test properties more useful. Namely, normalize their names to a common convention and remove useless text from their values. --- modules/ts/src/ts_func.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/modules/ts/src/ts_func.cpp b/modules/ts/src/ts_func.cpp index e2998149d..9b6b53581 100644 --- a/modules/ts/src/ts_func.cpp +++ b/modules/ts/src/ts_func.cpp @@ -2940,27 +2940,29 @@ MatComparator::operator()(const char* expr1, const char* expr2, void printVersionInfo(bool useStdOut) { - ::testing::Test::RecordProperty("CV_VERSION", CV_VERSION); + ::testing::Test::RecordProperty("cv_version", CV_VERSION); if(useStdOut) std::cout << "OpenCV version: " << CV_VERSION << std::endl; std::string buildInfo( cv::getBuildInformation() ); size_t pos1 = buildInfo.find("Version control"); - size_t pos2 = buildInfo.find("\n", pos1);\ + size_t pos2 = buildInfo.find('\n', pos1); if(pos1 != std::string::npos && pos2 != std::string::npos) { - std::string ver( buildInfo.substr(pos1, pos2-pos1) ); - ::testing::Test::RecordProperty("Version_control", ver); - if(useStdOut) std::cout << ver << std::endl; + size_t value_start = buildInfo.rfind(' ', pos2) + 1; + std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); + ::testing::Test::RecordProperty("cv_vcs_version", ver); + if (useStdOut) std::cout << "OpenCV VCS version: " << ver << std::endl; } pos1 = buildInfo.find("inner version"); - pos2 = buildInfo.find("\n", pos1);\ + pos2 = buildInfo.find('\n', pos1); if(pos1 != std::string::npos && pos2 != std::string::npos) { - std::string ver( buildInfo.substr(pos1, pos2-pos1) ); - ::testing::Test::RecordProperty("inner_version", ver); - if(useStdOut) std::cout << ver << std::endl; + size_t value_start = buildInfo.rfind(' ', pos2) + 1; + std::string ver( buildInfo.substr(value_start, pos2 - value_start) ); + ::testing::Test::RecordProperty("cv_inner_vcs_version", ver); + if(useStdOut) std::cout << "Inner VCS version: " << ver << std::endl; } #ifdef CV_PARALLEL_FRAMEWORK From 50166d2734663d305a3931ee95c880edfb073606 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Wed, 19 Jun 2013 15:29:20 +0400 Subject: [PATCH 107/121] fixed compilation with CUDA 4.2 --- modules/core/src/gpu_info.cpp | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/modules/core/src/gpu_info.cpp b/modules/core/src/gpu_info.cpp index 7520380ca..e3400a538 100644 --- a/modules/core/src/gpu_info.cpp +++ b/modules/core/src/gpu_info.cpp @@ -569,7 +569,12 @@ int cv::gpu::DeviceInfo::maxTexture1DMipmap() const throw_no_cuda(); return 0; #else - return deviceProps().get(device_id_)->maxTexture1DMipmap; + #if CUDA_VERSION >= 5000 + return deviceProps().get(device_id_)->maxTexture1DMipmap; + #else + CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0"); + return 0; + #endif #endif } @@ -599,7 +604,12 @@ Vec2i cv::gpu::DeviceInfo::maxTexture2DMipmap() const throw_no_cuda(); return Vec2i(); #else - return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap); + #if CUDA_VERSION >= 5000 + return Vec2i(deviceProps().get(device_id_)->maxTexture2DMipmap); + #else + CV_Error(Error::StsNotImplemented, "This function requires CUDA 5.0"); + return Vec2i(); + #endif #endif } From 1ed5fb937d34348becbf9fa3c837d1bdfe9c6f95 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Wed, 19 Jun 2013 15:39:11 +0400 Subject: [PATCH 108/121] Give cv::ocl::CLAHE a virtual destructor, for the usual reasons. --- modules/ocl/include/opencv2/ocl/ocl.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index d6dd4b983..3324b7932 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -520,6 +520,8 @@ namespace cv virtual Size getTilesGridSize() const = 0; virtual void collectGarbage() = 0; + + virtual ~CLAHE() {} }; CV_EXPORTS Ptr createCLAHE(double clipLimit = 40.0, Size tileGridSize = Size(8, 8)); From c1f4fe1637aa1279d7eef7ef95f26ea92c9de967 Mon Sep 17 00:00:00 2001 From: peng xiao Date: Thu, 20 Jun 2013 11:26:22 +0800 Subject: [PATCH 109/121] Fix a bug of convertTo. The bug was found that all 3-channel oclMat's were converted to 4-channel oclMat's after using convertTo function. --- modules/ocl/src/matrix_operations.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 268a1fe9b..172dfa5a8 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -394,7 +394,7 @@ void cv::ocl::oclMat::convertTo( oclMat &dst, int rtype, double alpha, double be if( rtype < 0 ) rtype = type(); else - rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), oclchannels()); + rtype = CV_MAKETYPE(CV_MAT_DEPTH(rtype), channels()); //int scn = channels(); int sdepth = depth(), ddepth = CV_MAT_DEPTH(rtype); From f08d75a84bcb8c96af4d540af03b15c59c8f4b50 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Thu, 20 Jun 2013 11:24:29 +0400 Subject: [PATCH 110/121] removed obsolete API --- .../gpufilters/include/opencv2/gpufilters.hpp | 139 ------------------ 1 file changed, 139 deletions(-) diff --git a/modules/gpufilters/include/opencv2/gpufilters.hpp b/modules/gpufilters/include/opencv2/gpufilters.hpp index a2cc8db6b..b0ebfd73c 100644 --- a/modules/gpufilters/include/opencv2/gpufilters.hpp +++ b/modules/gpufilters/include/opencv2/gpufilters.hpp @@ -50,17 +50,6 @@ #include "opencv2/core/gpu.hpp" #include "opencv2/imgproc.hpp" -#if defined __GNUC__ - #define __OPENCV_GPUFILTERS_DEPR_BEFORE__ - #define __OPENCV_GPUFILTERS_DEPR_AFTER__ __attribute__ ((deprecated)) -#elif (defined WIN32 || defined _WIN32) - #define __OPENCV_GPUFILTERS_DEPR_BEFORE__ __declspec(deprecated) - #define __OPENCV_GPUFILTERS_DEPR_AFTER__ -#else - #define __OPENCV_GPUFILTERS_DEPR_BEFORE__ - #define __OPENCV_GPUFILTERS_DEPR_AFTER__ -#endif - namespace cv { namespace gpu { class CV_EXPORTS Filter : public Algorithm @@ -77,28 +66,6 @@ public: CV_EXPORTS Ptr createBoxFilter(int srcType, int dstType, Size ksize, Point anchor = Point(-1,-1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -// obsolete - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void boxFilter(InputArray src, OutputArray dst, int dstType, - Size ksize, Point anchor = Point(-1,-1), - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void boxFilter(InputArray src, OutputArray dst, int dstType, Size ksize, Point anchor, Stream& stream) -{ - Ptr f = gpu::createBoxFilter(src.type(), dstType, ksize, anchor); - f->apply(src, dst, stream); -} - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void blur(InputArray src, OutputArray dst, Size ksize, - Point anchor = Point(-1,-1), - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stream& stream) -{ - Ptr f = gpu::createBoxFilter(src.type(), -1, ksize, anchor); - f->apply(src, dst, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Linear Filter @@ -107,18 +74,6 @@ inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stre CV_EXPORTS Ptr createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1), int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -// obsolete - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, - Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, Point anchor, int borderType, Stream& stream) -{ - Ptr f = gpu::createLinearFilter(src.type(), ddepth, kernel, anchor, borderType); - f->apply(src, dst, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Laplacian Filter @@ -127,18 +82,6 @@ inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray ker CV_EXPORTS Ptr createLaplacianFilter(int srcType, int dstType, int ksize = 1, double scale = 1, int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0)); -// obsolete - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void Laplacian(InputArray src, OutputArray dst, int ddepth, - int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void Laplacian(InputArray src, OutputArray dst, int ddepth, int ksize, double scale, int borderType, Stream& stream) -{ - Ptr f = gpu::createLaplacianFilter(src.type(), ddepth, ksize, scale, borderType); - f->apply(src, dst, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Separable Linear Filter @@ -146,18 +89,6 @@ inline void Laplacian(InputArray src, OutputArray dst, int ddepth, int ksize, do CV_EXPORTS Ptr createSeparableLinearFilter(int srcType, int dstType, InputArray rowKernel, InputArray columnKernel, Point anchor = Point(-1,-1), int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); -// obsolete - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void sepFilter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernelX, InputArray kernelY, - Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void sepFilter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernelX, InputArray kernelY, Point anchor, int rowBorderType, int columnBorderType, Stream& stream) -{ - Ptr f = gpu::createSeparableLinearFilter(src.type(), ddepth, kernelX, kernelY, anchor, rowBorderType, columnBorderType); - f->apply(src, dst, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Deriv Filter @@ -174,28 +105,6 @@ CV_EXPORTS Ptr createSobelFilter(int srcType, int dstType, int dx, int d CV_EXPORTS Ptr createScharrFilter(int srcType, int dstType, int dx, int dy, double scale = 1, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); -// obsolete - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void Sobel(InputArray src, OutputArray dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void Sobel(InputArray src, OutputArray dst, int ddepth, int dx, int dy, int ksize, double scale, int rowBorderType, int columnBorderType, Stream& stream) -{ - Ptr f = gpu::createSobelFilter(src.type(), ddepth, dx, dy, ksize, scale, rowBorderType, columnBorderType); - f->apply(src, dst, stream); -} - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void Scharr(InputArray src, OutputArray dst, int ddepth, int dx, int dy, double scale = 1, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void Scharr(InputArray src, OutputArray dst, int ddepth, int dx, int dy, double scale, int rowBorderType, int columnBorderType, Stream& stream) -{ - Ptr f = gpu::createScharrFilter(src.type(), ddepth, dx, dy, scale, rowBorderType, columnBorderType); - f->apply(src, dst, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Gaussian Filter @@ -204,19 +113,6 @@ CV_EXPORTS Ptr createGaussianFilter(int srcType, int dstType, Size ksize double sigma1, double sigma2 = 0, int rowBorderMode = BORDER_DEFAULT, int columnBorderMode = -1); -// obsolete - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void GaussianBlur(InputArray src, OutputArray dst, Size ksize, - double sigma1, double sigma2 = 0, - int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sigma1, double sigma2, int rowBorderType, int columnBorderType, Stream& stream) -{ - Ptr f = gpu::createGaussianFilter(src.type(), -1, ksize, sigma1, sigma2, rowBorderType, columnBorderType); - f->apply(src, dst, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Morphology Filter @@ -224,38 +120,6 @@ inline void GaussianBlur(InputArray src, OutputArray dst, Size ksize, double sig //! supports CV_8UC1 and CV_8UC4 types CV_EXPORTS Ptr createMorphologyFilter(int op, int srcType, InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1); -// obsolete - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void erode(InputArray src, OutputArray dst, InputArray kernel, - Point anchor = Point(-1, -1), int iterations = 1, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void erode(InputArray src, OutputArray dst, InputArray kernel, Point anchor, int iterations, Stream& stream) -{ - Ptr f = gpu::createMorphologyFilter(MORPH_ERODE, src.type(), kernel, anchor, iterations); - f->apply(src, dst, stream); -} - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void dilate(InputArray src, OutputArray dst, InputArray kernel, - Point anchor = Point(-1, -1), int iterations = 1, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void dilate(InputArray src, OutputArray dst, InputArray kernel, Point anchor, int iterations, Stream& stream) -{ - Ptr f = gpu::createMorphologyFilter(MORPH_DILATE, src.type(), kernel, anchor, iterations); - f->apply(src, dst, stream); -} - -__OPENCV_GPUFILTERS_DEPR_BEFORE__ void morphologyEx(InputArray src, OutputArray dst, int op, - InputArray kernel, Point anchor = Point(-1, -1), int iterations = 1, - Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__; - -inline void morphologyEx(InputArray src, OutputArray dst, int op, InputArray kernel, Point anchor, int iterations, Stream& stream) -{ - Ptr f = gpu::createMorphologyFilter(op, src.type(), kernel, anchor, iterations); - f->apply(src, dst, stream); -} - //////////////////////////////////////////////////////////////////////////////////////////////////// // Image Rank Filter @@ -282,7 +146,4 @@ CV_EXPORTS Ptr createColumnSumFilter(int srcType, int dstType, int ksize }} // namespace cv { namespace gpu { -#undef __OPENCV_GPUFILTERS_DEPR_BEFORE__ -#undef __OPENCV_GPUFILTERS_DEPR_AFTER__ - #endif /* __OPENCV_GPUFILTERS_HPP__ */ From 3e2c4563134e2b88408ad7b1a280a312eb46d4a4 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 20 Jun 2013 14:27:51 +0400 Subject: [PATCH 111/121] A few minor improvements to the XLS report generator. * In comparison column headers, switched the order of labels, so that it's "to" vs "from". * When a test was present, but not run successfully, put its status in the corresponding cell instead of coloring it gray. --- modules/ts/misc/xls-report.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index f6278bae0..c13842cdc 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -4,6 +4,7 @@ from __future__ import division import ast import logging +import numbers import os, os.path import re @@ -52,8 +53,7 @@ def collect_xml(collection, configuration, xml_fullname): for test in sorted(parseLogFile(xml_fullname)): test_results = module_tests.setdefault((test.shortName(), test.param()), {}) - if test.status == 'run': - test_results[configuration] = test.get("gmean") + test_results[configuration] = test.get("gmean") if test.status == 'run' else test.status def main(): arg_parser = ArgumentParser(description='Build an XLS performance report.') @@ -117,7 +117,7 @@ def main(): for i, caption in enumerate(['Module', 'Test', 'Image\nsize', 'Data\ntype', 'Parameters'] + config_names + [None] - + [comp['from'] + '\nvs\n' + comp['to'] for comp in sheet_comparisons]): + + [comp['to'] + '\nvs\n' + comp['from'] for comp in sheet_comparisons]): sheet.row(0).write(i, caption, header_style) row = 1 @@ -143,13 +143,13 @@ def main(): sheet.write(row, 5 + i, None, no_time_style) for i, comp in enumerate(sheet_comparisons): - left = configs.get(comp["from"]) - right = configs.get(comp["to"]) + cmp_from = configs.get(comp["from"]) + cmp_to = configs.get(comp["to"]) col = 5 + len(config_names) + 1 + i - if left is not None and right is not None: + if isinstance(cmp_from, numbers.Number) and isinstance(cmp_to, numbers.Number): try: - speedup = left / right + speedup = cmp_from / cmp_to sheet.write(row, col, speedup, good_speedup_style if speedup > 1.1 else bad_speedup_style if speedup < 0.9 else speedup_style) From 57317c3196fb9d5fbe9e00b16453dea7d534ac11 Mon Sep 17 00:00:00 2001 From: Roman Donchenko Date: Thu, 20 Jun 2013 19:39:02 +0400 Subject: [PATCH 112/121] Use log formatting as intended. --- modules/ts/misc/xls-report.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ts/misc/xls-report.py b/modules/ts/misc/xls-report.py index c13842cdc..e79bb123d 100755 --- a/modules/ts/misc/xls-report.py +++ b/modules/ts/misc/xls-report.py @@ -79,7 +79,7 @@ def main(): sheet_conf = ast.literal_eval(sheet_conf_file.read()) except Exception: sheet_conf = {} - logging.debug('no sheet.conf for {}'.format(sheet_path)) + logging.debug('no sheet.conf for %s', sheet_path) sheet_conf = dict(global_conf.items() + sheet_conf.items()) @@ -90,14 +90,14 @@ def main(): config_names = [p for p in os.listdir(sheet_path) if os.path.isdir(os.path.join(sheet_path, p))] except Exception as e: - logging.warning(e) + logging.warning('error while determining configuration names for %s: %s', sheet_path, e) continue collection = {} for configuration, configuration_path in \ [(c, os.path.join(sheet_path, c)) for c in config_names]: - logging.info('processing {}'.format(configuration_path)) + logging.info('processing %s', configuration_path) for xml_fullname in glob(os.path.join(configuration_path, '*.xml')): collect_xml(collection, configuration, xml_fullname) From e12963826337daa5ff67198e25b17f0dfdbf2edf Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 21 Jun 2013 14:05:29 +0800 Subject: [PATCH 113/121] Add a workaround to interpolate between oclMat and Input/OutputArray. --- modules/core/include/opencv2/core/core.hpp | 3 ++- modules/core/src/matrix.cpp | 30 ++++++++++++++++++++++ modules/ocl/include/opencv2/ocl/ocl.hpp | 8 ++++++ modules/ocl/src/matrix_operations.cpp | 29 +++++++++++++++++++++ 4 files changed, 69 insertions(+), 1 deletion(-) diff --git a/modules/core/include/opencv2/core/core.hpp b/modules/core/include/opencv2/core/core.hpp index 2b7791958..5ff31fe3a 100644 --- a/modules/core/include/opencv2/core/core.hpp +++ b/modules/core/include/opencv2/core/core.hpp @@ -1322,7 +1322,8 @@ public: EXPR = 6 << KIND_SHIFT, OPENGL_BUFFER = 7 << KIND_SHIFT, OPENGL_TEXTURE = 8 << KIND_SHIFT, - GPU_MAT = 9 << KIND_SHIFT + GPU_MAT = 9 << KIND_SHIFT, + OCL_MAT =10 << KIND_SHIFT }; _InputArray(); diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index 7acb0e0db..c4c0041dd 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -980,6 +980,11 @@ Mat _InputArray::getMat(int i) const return !v.empty() ? Mat(size(i), t, (void*)&v[0]) : Mat(); } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { @@ -1062,6 +1067,11 @@ void _InputArray::getMatVector(vector& mv) const return; } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { @@ -1189,6 +1199,11 @@ Size _InputArray::size(int i) const return tex->size(); } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == GPU_MAT ); //if( k == GPU_MAT ) { @@ -1303,6 +1318,11 @@ bool _InputArray::empty() const if( k == OPENGL_TEXTURE ) return ((const ogl::Texture2D*)obj)->empty(); + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == GPU_MAT ); //if( k == GPU_MAT ) return ((const gpu::GpuMat*)obj)->empty(); @@ -1523,6 +1543,11 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all return; } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + if( k == NONE ) { CV_Error(CV_StsNullPtr, "create() called for the missing output array" ); @@ -1634,6 +1659,11 @@ void _OutputArray::release() const return; } + if( k == OCL_MAT ) + { + CV_Error(-1, "Not implemented"); + } + CV_Assert( k == STD_VECTOR_MAT ); //if( k == STD_VECTOR_MAT ) { diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index d6dd4b983..9fdd8f3e9 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -248,6 +248,11 @@ namespace cv operator Mat() const; void download(cv::Mat &m) const; + //! convert to _InputArray + operator _InputArray(); + + //! convert to _OutputArray + operator _OutputArray(); //! returns a new oclMatrix header for the specified row oclMat row(int y) const; @@ -387,6 +392,9 @@ namespace cv int wholecols; }; + // convert InputArray/OutputArray to oclMat + CV_EXPORTS oclMat& getOclMat(InputArray src); + CV_EXPORTS oclMat& getOclMat(OutputArray src); ///////////////////// mat split and merge ///////////////////////////////// //! Compose a multi-channel array from several single-channel arrays diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index 268a1fe9b..dc7deebe3 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -74,6 +74,7 @@ namespace cv } } + //////////////////////////////////////////////////////////////////////// // convert_C3C4 static void convert_C3C4(const cl_mem &src, oclMat &dst) @@ -227,6 +228,34 @@ void cv::ocl::oclMat::upload(const Mat &m) //download_channels = m.channels(); } +cv::ocl::oclMat::operator cv::_InputArray() +{ + _InputArray newInputArray; + newInputArray.flags = cv::_InputArray::OCL_MAT; + newInputArray.obj = reinterpret_cast(this); + return newInputArray; +} + +cv::ocl::oclMat::operator cv::_OutputArray() +{ + _OutputArray newOutputArray; + newOutputArray.flags = cv::_InputArray::OCL_MAT; + newOutputArray.obj = reinterpret_cast(this); + return newOutputArray; +} + +cv::ocl::oclMat& cv::ocl::getOclMat(InputArray src) +{ + CV_Assert(src.flags & cv::_InputArray::OCL_MAT); + return *reinterpret_cast(src.obj); +} + +cv::ocl::oclMat& cv::ocl::getOclMat(OutputArray src) +{ + CV_Assert(src.flags & cv::_InputArray::OCL_MAT); + return *reinterpret_cast(src.obj); +} + void cv::ocl::oclMat::download(cv::Mat &m) const { CV_DbgAssert(!this->empty()); From 6326739b443c0e87a251446893ee18225eeaf428 Mon Sep 17 00:00:00 2001 From: yao Date: Fri, 21 Jun 2013 14:50:08 +0800 Subject: [PATCH 114/121] a bug fix in stereo_match sample --- samples/ocl/stereo_match.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/samples/ocl/stereo_match.cpp b/samples/ocl/stereo_match.cpp index 565744baa..abe75c70e 100644 --- a/samples/ocl/stereo_match.cpp +++ b/samples/ocl/stereo_match.cpp @@ -192,10 +192,9 @@ void App::run() csbp(d_left, d_right, d_disp); break; } - workEnd(); - // Show results d_disp.download(disp); + workEnd(); if (method != BM) { disp.convertTo(disp, 0); From 290c8db0a85ff6e4a9d84243624852a21190598f Mon Sep 17 00:00:00 2001 From: peng xiao Date: Fri, 21 Jun 2013 14:51:23 +0800 Subject: [PATCH 115/121] Revise naming for getOclMat function. --- modules/core/src/matrix.cpp | 12 ++++++------ modules/ocl/include/opencv2/ocl/ocl.hpp | 6 +++--- modules/ocl/src/matrix_operations.cpp | 4 ++-- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/modules/core/src/matrix.cpp b/modules/core/src/matrix.cpp index c4c0041dd..5a3600b9b 100644 --- a/modules/core/src/matrix.cpp +++ b/modules/core/src/matrix.cpp @@ -982,7 +982,7 @@ Mat _InputArray::getMat(int i) const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == STD_VECTOR_MAT ); @@ -1069,7 +1069,7 @@ void _InputArray::getMatVector(vector& mv) const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == STD_VECTOR_MAT ); @@ -1201,7 +1201,7 @@ Size _InputArray::size(int i) const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == GPU_MAT ); @@ -1320,7 +1320,7 @@ bool _InputArray::empty() const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == GPU_MAT ); @@ -1545,7 +1545,7 @@ void _OutputArray::create(int dims, const int* sizes, int mtype, int i, bool all if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } if( k == NONE ) @@ -1661,7 +1661,7 @@ void _OutputArray::release() const if( k == OCL_MAT ) { - CV_Error(-1, "Not implemented"); + CV_Error(CV_StsNotImplemented, "This method is not implemented for oclMat yet"); } CV_Assert( k == STD_VECTOR_MAT ); diff --git a/modules/ocl/include/opencv2/ocl/ocl.hpp b/modules/ocl/include/opencv2/ocl/ocl.hpp index 9fdd8f3e9..ed887e61a 100644 --- a/modules/ocl/include/opencv2/ocl/ocl.hpp +++ b/modules/ocl/include/opencv2/ocl/ocl.hpp @@ -392,9 +392,9 @@ namespace cv int wholecols; }; - // convert InputArray/OutputArray to oclMat - CV_EXPORTS oclMat& getOclMat(InputArray src); - CV_EXPORTS oclMat& getOclMat(OutputArray src); + // convert InputArray/OutputArray to oclMat references + CV_EXPORTS oclMat& getOclMatRef(InputArray src); + CV_EXPORTS oclMat& getOclMatRef(OutputArray src); ///////////////////// mat split and merge ///////////////////////////////// //! Compose a multi-channel array from several single-channel arrays diff --git a/modules/ocl/src/matrix_operations.cpp b/modules/ocl/src/matrix_operations.cpp index dc7deebe3..dcaf0418a 100644 --- a/modules/ocl/src/matrix_operations.cpp +++ b/modules/ocl/src/matrix_operations.cpp @@ -244,13 +244,13 @@ cv::ocl::oclMat::operator cv::_OutputArray() return newOutputArray; } -cv::ocl::oclMat& cv::ocl::getOclMat(InputArray src) +cv::ocl::oclMat& cv::ocl::getOclMatRef(InputArray src) { CV_Assert(src.flags & cv::_InputArray::OCL_MAT); return *reinterpret_cast(src.obj); } -cv::ocl::oclMat& cv::ocl::getOclMat(OutputArray src) +cv::ocl::oclMat& cv::ocl::getOclMatRef(OutputArray src) { CV_Assert(src.flags & cv::_InputArray::OCL_MAT); return *reinterpret_cast(src.obj); From 98bd401534885f36000262671a7d07e274129ea3 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Fri, 21 Jun 2013 13:53:56 +0400 Subject: [PATCH 116/121] fix gpu::DeviceInfo compilation under linux glibc defines major and minor macros which conflict with gpu::DeviceInfo methods with the same name --- modules/core/include/opencv2/core/gpu.hpp | 4 ++-- modules/core/include/opencv2/core/gpu.inl.hpp | 2 +- modules/core/src/gpu_info.cpp | 12 ++++++------ .../gpu/doc/initalization_and_information.rst | 16 ++++++++-------- modules/gpufilters/src/filtering.cpp | 2 +- modules/gpuoptflow/test/test_optflow.cpp | 2 +- modules/ts/src/gpu_perf.cpp | 2 +- samples/gpu/driver_api_multi.cpp | 4 ++-- samples/gpu/driver_api_stereo_multi.cpp | 4 ++-- samples/gpu/multi.cpp | 4 ++-- samples/gpu/performance/performance.cpp | 2 +- samples/gpu/stereo_multi.cpp | 4 ++-- 12 files changed, 29 insertions(+), 29 deletions(-) diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp index 775a9d026..3ab299cf3 100644 --- a/modules/core/include/opencv2/core/gpu.hpp +++ b/modules/core/include/opencv2/core/gpu.hpp @@ -529,10 +529,10 @@ public: size_t totalConstMem() const; //! major compute capability - int major() const; + int majorVersion() const; //! minor compute capability - int minor() const; + int minorVersion() const; //! alignment requirement for textures size_t textureAlignment() const; diff --git a/modules/core/include/opencv2/core/gpu.inl.hpp b/modules/core/include/opencv2/core/gpu.inl.hpp index b44c2b151..2d08c4436 100644 --- a/modules/core/include/opencv2/core/gpu.inl.hpp +++ b/modules/core/include/opencv2/core/gpu.inl.hpp @@ -619,7 +619,7 @@ size_t DeviceInfo::totalMemory() const inline bool DeviceInfo::supports(FeatureSet feature_set) const { - int version = major() * 10 + minor(); + int version = majorVersion() * 10 + minorVersion(); return version >= feature_set; } diff --git a/modules/core/src/gpu_info.cpp b/modules/core/src/gpu_info.cpp index e3400a538..5a1e56746 100644 --- a/modules/core/src/gpu_info.cpp +++ b/modules/core/src/gpu_info.cpp @@ -119,7 +119,7 @@ bool cv::gpu::deviceSupports(FeatureSet feature_set) else { DeviceInfo dev(devId); - version = dev.major() * 10 + dev.minor(); + version = dev.majorVersion() * 10 + dev.minorVersion(); if (devId < cache_size) versions[devId] = version; } @@ -455,7 +455,7 @@ size_t cv::gpu::DeviceInfo::totalConstMem() const #endif } -int cv::gpu::DeviceInfo::major() const +int cv::gpu::DeviceInfo::majorVersion() const { #ifndef HAVE_CUDA throw_no_cuda(); @@ -465,7 +465,7 @@ int cv::gpu::DeviceInfo::major() const #endif } -int cv::gpu::DeviceInfo::minor() const +int cv::gpu::DeviceInfo::minorVersion() const { #ifndef HAVE_CUDA throw_no_cuda(); @@ -908,12 +908,12 @@ bool cv::gpu::DeviceInfo::isCompatible() const return false; #else // Check PTX compatibility - if (TargetArchs::hasEqualOrLessPtx(major(), minor())) + if (TargetArchs::hasEqualOrLessPtx(majorVersion(), minorVersion())) return true; // Check BIN compatibility - for (int i = minor(); i >= 0; --i) - if (TargetArchs::hasBin(major(), i)) + for (int i = minorVersion(); i >= 0; --i) + if (TargetArchs::hasBin(majorVersion(), i)) return true; return false; diff --git a/modules/gpu/doc/initalization_and_information.rst b/modules/gpu/doc/initalization_and_information.rst index ad4b29d42..abfc0860c 100644 --- a/modules/gpu/doc/initalization_and_information.rst +++ b/modules/gpu/doc/initalization_and_information.rst @@ -147,10 +147,10 @@ Class providing functionality for querying the specified GPU properties. :: size_t totalConstMem() const; //! major compute capability - int major() const; + int majorVersion() const; //! minor compute capability - int minor() const; + int minorVersion() const; //! alignment requirement for textures size_t textureAlignment() const; @@ -313,19 +313,19 @@ Returns the device name. -gpu::DeviceInfo::major ----------------------- +gpu::DeviceInfo::majorVersion +----------------------------- Returns the major compute capability version. -.. ocv:function:: int gpu::DeviceInfo::major() +.. ocv:function:: int gpu::DeviceInfo::majorVersion() -gpu::DeviceInfo::minor ----------------------- +gpu::DeviceInfo::minorVersion +----------------------------- Returns the minor compute capability version. -.. ocv:function:: int gpu::DeviceInfo::minor() +.. ocv:function:: int gpu::DeviceInfo::minorVersion() diff --git a/modules/gpufilters/src/filtering.cpp b/modules/gpufilters/src/filtering.cpp index 7f02bdac5..5a852c923 100644 --- a/modules/gpufilters/src/filtering.cpp +++ b/modules/gpufilters/src/filtering.cpp @@ -409,7 +409,7 @@ namespace ensureSizeIsEnough(src.size(), bufType_, buf_); DeviceInfo devInfo; - const int cc = devInfo.major() * 10 + devInfo.minor(); + const int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion(); cudaStream_t stream = StreamAccessor::getStream(_stream); diff --git a/modules/gpuoptflow/test/test_optflow.cpp b/modules/gpuoptflow/test/test_optflow.cpp index c20260e19..fce07551d 100644 --- a/modules/gpuoptflow/test/test_optflow.cpp +++ b/modules/gpuoptflow/test/test_optflow.cpp @@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression) brox(loadMat(frame0), loadMat(frame1), u, v); std::string fname(cvtest::TS::ptr()->get_data_path()); - if (devInfo.major() >= 2) + if (devInfo.majorVersion() >= 2) fname += "opticalflow/brox_optical_flow_cc20.bin"; else fname += "opticalflow/brox_optical_flow.bin"; diff --git a/modules/ts/src/gpu_perf.cpp b/modules/ts/src/gpu_perf.cpp index dca181468..2bca535c4 100644 --- a/modules/ts/src/gpu_perf.cpp +++ b/modules/ts/src/gpu_perf.cpp @@ -288,7 +288,7 @@ namespace perf printf("[----------]\n"), fflush(stdout); printf("[ DEVICE ] \t# %d %s.\n", i, info.name()), fflush(stdout); - printf("[ ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout); + printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout); printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout); printf("[ ] \tTotal memory: %d Mb\n", static_cast(static_cast(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout); printf("[ ] \tFree memory: %d Mb\n", static_cast(static_cast(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout); diff --git a/samples/gpu/driver_api_multi.cpp b/samples/gpu/driver_api_multi.cpp index 1dfe2123c..e78f7e54f 100644 --- a/samples/gpu/driver_api_multi.cpp +++ b/samples/gpu/driver_api_multi.cpp @@ -86,8 +86,8 @@ int main() if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.major() - << dev_info.minor() << "\n"; + << dev_info.name() << ", CC " << dev_info.majorVersion() + << dev_info.minorVersion() << "\n"; return -1; } } diff --git a/samples/gpu/driver_api_stereo_multi.cpp b/samples/gpu/driver_api_stereo_multi.cpp index c49fc8564..d40c20c1e 100644 --- a/samples/gpu/driver_api_stereo_multi.cpp +++ b/samples/gpu/driver_api_stereo_multi.cpp @@ -116,8 +116,8 @@ int main(int argc, char** argv) if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.major() - << dev_info.minor() << "\n"; + << dev_info.name() << ", CC " << dev_info.majorVersion() + << dev_info.minorVersion() << "\n"; return -1; } } diff --git a/samples/gpu/multi.cpp b/samples/gpu/multi.cpp index c6e6aa398..b83fd2ce4 100644 --- a/samples/gpu/multi.cpp +++ b/samples/gpu/multi.cpp @@ -62,8 +62,8 @@ int main() if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.major() - << dev_info.minor() << "\n"; + << dev_info.name() << ", CC " << dev_info.majorVersion() + << dev_info.minorVersion() << "\n"; return -1; } } diff --git a/samples/gpu/performance/performance.cpp b/samples/gpu/performance/performance.cpp index 42fd978c1..8af0b3d0d 100644 --- a/samples/gpu/performance/performance.cpp +++ b/samples/gpu/performance/performance.cpp @@ -191,7 +191,7 @@ int main(int argc, const char* argv[]) DeviceInfo dev_info(device); if (!dev_info.isCompatible()) { - cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl; + cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl; return -1; } setDevice(device); diff --git a/samples/gpu/stereo_multi.cpp b/samples/gpu/stereo_multi.cpp index 1bb09b22b..83e2f2578 100644 --- a/samples/gpu/stereo_multi.cpp +++ b/samples/gpu/stereo_multi.cpp @@ -81,8 +81,8 @@ int main(int argc, char** argv) if (!dev_info.isCompatible()) { std::cout << "GPU module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.major() - << dev_info.minor() << "\n"; + << dev_info.name() << ", CC " << dev_info.majorVersion() + << dev_info.minorVersion() << "\n"; return -1; } } From 37f4e400e4a8a855a742af5b263e61cb9254182e Mon Sep 17 00:00:00 2001 From: abidrahmank Date: Mon, 24 Jun 2013 12:13:59 +0530 Subject: [PATCH 117/121] Added cv2.boxPoints() functionality to Python bindings (Feature #2023) http://www.code.opencv.org/issues/2023 eg: In [3]: box = ((10,10),(5,5),0) In [4]: cv2.boxPoints(box) Out[4]: array([[ 7.5, 12.5], [ 7.5, 7.5], [ 12.5, 7.5], [ 12.5, 12.5]], dtype=float32) --- modules/imgproc/include/opencv2/imgproc.hpp | 3 +++ modules/imgproc/src/rotcalipers.cpp | 7 +++++++ 2 files changed, 10 insertions(+) diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index fcaf6a58e..6d6108872 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -1318,6 +1318,9 @@ CV_EXPORTS_W double contourArea( InputArray contour, bool oriented = false ); //! computes the minimal rotated rectangle for a set of points CV_EXPORTS_W RotatedRect minAreaRect( InputArray points ); +//! computes boxpoints +CV_EXPORTS_W void boxPoints(RotatedRect box, OutputArray points); + //! computes the minimal enclosing circle for a set of points CV_EXPORTS_W void minEnclosingCircle( InputArray points, CV_OUT Point2f& center, CV_OUT float& radius ); diff --git a/modules/imgproc/src/rotcalipers.cpp b/modules/imgproc/src/rotcalipers.cpp index cc43732c2..98ae6df03 100644 --- a/modules/imgproc/src/rotcalipers.cpp +++ b/modules/imgproc/src/rotcalipers.cpp @@ -398,3 +398,10 @@ cvMinAreaRect2( const CvArr* array, CvMemStorage* /*storage*/ ) return (CvBox2D)rr; } +void cv::boxPoints(cv::RotatedRect box, OutputArray _pts) +{ + _pts.create(4, 2, CV_32F); + Mat pts = _pts.getMat(); + box.points((Point2f*)pts.data); +} + From bcf9117957a6c8e79e37761aa4734533db1e18fb Mon Sep 17 00:00:00 2001 From: abidrahmank Date: Mon, 24 Jun 2013 15:53:45 +0530 Subject: [PATCH 118/121] Added missing python functions in highgui documentation setMouseCallback createTrackbar --- modules/highgui/doc/user_interface.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/modules/highgui/doc/user_interface.rst b/modules/highgui/doc/user_interface.rst index f84a04c21..e4276718b 100644 --- a/modules/highgui/doc/user_interface.rst +++ b/modules/highgui/doc/user_interface.rst @@ -9,6 +9,8 @@ Creates a trackbar and attaches it to the specified window. .. ocv:function:: int createTrackbar( const String& trackbarname, const String& winname, int* value, int count, TrackbarCallback onChange=0, void* userdata=0) +.. ocv:pyfunction:: cv2.createTrackbar(trackbarName, windowName, value, count, onChange) -> None + .. ocv:cfunction:: int cvCreateTrackbar( const char* trackbar_name, const char* window_name, int* value, int count, CvTrackbarCallback on_change=NULL ) :param trackbarname: Name of the created trackbar. @@ -181,6 +183,8 @@ Sets mouse handler for the specified window .. ocv:function:: void setMouseCallback( const String& winname, MouseCallback onMouse, void* userdata=0 ) +.. ocv:pyfunction:: cv2.setMouseCallback(windowName, onMouse [, param]) -> None + .. ocv:cfunction:: void cvSetMouseCallback( const char* window_name, CvMouseCallback on_mouse, void* param=NULL ) :param winname: Window name From a1ea1a7ec5b013c3730400d80b609453065c9191 Mon Sep 17 00:00:00 2001 From: abidrahmank Date: Mon, 24 Jun 2013 16:17:23 +0530 Subject: [PATCH 119/121] boxpoints documentation --- ...ructural_analysis_and_shape_descriptors.rst | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst index 6f7cba3a9..136d3e3df 100644 --- a/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst +++ b/modules/imgproc/doc/structural_analysis_and_shape_descriptors.rst @@ -522,6 +522,24 @@ The function calculates and returns the minimum-area bounding rectangle (possibl +boxPoints +----------- +Finds the four vertices of a rotated rect. Useful to draw the rotated rectangle. + +.. ocv:function:: void boxPoints(RotatedRect box, OutputArray points) + +.. ocv:pyfunction:: cv2.boxPoints(box[, points]) -> points + +.. ocv:cfunction:: void cvBoxPoints( CvBox2D box, CvPoint2D32f pt[4] ) + + :param box: The input rotated rectangle. It may be the output of .. ocv:function:: minAreaRect. + + :param points: The output array of four vertices of rectangles. + +The function finds the four vertices of a rotated rectangle. This function is useful to draw the rectangle. In C++, instead of using this function, you can directly use box.points() method. Please visit the `tutorial on bounding rectangle `_ for more information. + + + minEnclosingCircle ---------------------- Finds a circle of the minimum area enclosing a 2D point set. From 3c32fc13292a32719a429741b71bd82ce78154f1 Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Tue, 25 Jun 2013 17:45:23 +0400 Subject: [PATCH 120/121] removed obsolete methods from gpu::Stream --- modules/core/include/opencv2/core/gpu.hpp | 13 ------- modules/core/include/opencv2/core/gpu.inl.hpp | 36 ------------------- 2 files changed, 49 deletions(-) diff --git a/modules/core/include/opencv2/core/gpu.hpp b/modules/core/include/opencv2/core/gpu.hpp index 775a9d026..79d90c2c3 100644 --- a/modules/core/include/opencv2/core/gpu.hpp +++ b/modules/core/include/opencv2/core/gpu.hpp @@ -375,19 +375,6 @@ public: //! returns true if stream object is not default (!= 0) operator bool_type() const; - // obsolete methods - - void enqueueDownload(const GpuMat& src, OutputArray dst); - - void enqueueUpload(InputArray src, GpuMat& dst); - - void enqueueCopy(const GpuMat& src, OutputArray dst); - - void enqueueMemSet(GpuMat& src, Scalar val); - void enqueueMemSet(GpuMat& src, Scalar val, InputArray mask); - - void enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha = 1.0, double beta = 0.0); - class Impl; private: diff --git a/modules/core/include/opencv2/core/gpu.inl.hpp b/modules/core/include/opencv2/core/gpu.inl.hpp index b44c2b151..e66b7e65e 100644 --- a/modules/core/include/opencv2/core/gpu.inl.hpp +++ b/modules/core/include/opencv2/core/gpu.inl.hpp @@ -525,42 +525,6 @@ void swap(CudaMem& a, CudaMem& b) //////////////////////////////// Stream /////////////////////////////// -inline -void Stream::enqueueDownload(const GpuMat& src, OutputArray dst) -{ - src.download(dst, *this); -} - -inline -void Stream::enqueueUpload(InputArray src, GpuMat& dst) -{ - dst.upload(src, *this); -} - -inline -void Stream::enqueueCopy(const GpuMat& src, OutputArray dst) -{ - src.copyTo(dst, *this); -} - -inline -void Stream::enqueueMemSet(GpuMat& src, Scalar val) -{ - src.setTo(val, *this); -} - -inline -void Stream::enqueueMemSet(GpuMat& src, Scalar val, InputArray mask) -{ - src.setTo(val, mask, *this); -} - -inline -void Stream::enqueueConvert(const GpuMat& src, OutputArray dst, int dtype, double alpha, double beta) -{ - src.convertTo(dst, dtype, alpha, beta, *this); -} - inline Stream::Stream(const Ptr& impl) : impl_(impl) From f15dc72b894adef83ada865a8338d4a61ed7ba97 Mon Sep 17 00:00:00 2001 From: Vikas Dhiman Date: Wed, 26 Jun 2013 12:16:25 -0400 Subject: [PATCH 121/121] Removed misleading comment and added the divisibility test to ::run function. --- modules/core/test/test_operations.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/modules/core/test/test_operations.cpp b/modules/core/test/test_operations.cpp index 84a2e573e..1fbc4242f 100644 --- a/modules/core/test/test_operations.cpp +++ b/modules/core/test/test_operations.cpp @@ -896,7 +896,7 @@ bool CV_OperationsTest::TestMatxElementwiseDivison() { try { - Matx22f mat(2, 4, 6, 8); // Identity matrix + Matx22f mat(2, 4, 6, 8); Matx22f mat2(2, 2, 2, 2); Matx22f res = mat / mat2; @@ -1132,6 +1132,9 @@ void CV_OperationsTest::run( int /* start_from */) if (!TestMatxMultiplication()) return; + if (!TestMatxElementwiseDivison()) + return; + if (!TestSubMatAccess()) return;