diff --git a/3rdparty/libtiff/CMakeLists.txt b/3rdparty/libtiff/CMakeLists.txt index f77921081..cea2f906f 100644 --- a/3rdparty/libtiff/CMakeLists.txt +++ b/3rdparty/libtiff/CMakeLists.txt @@ -17,7 +17,7 @@ check_include_file(string.h HAVE_STRING_H) check_include_file(sys/types.h HAVE_SYS_TYPES_H) check_include_file(unistd.h HAVE_UNISTD_H) -if(WIN32) +if(WIN32 AND NOT HAVE_WINRT) set(USE_WIN32_FILEIO 1) endif() @@ -79,14 +79,12 @@ set(lib_srcs "${CMAKE_CURRENT_BINARY_DIR}/tif_config.h" ) -if(UNIX) +if(WIN32 AND NOT HAVE_WINRT) + list(APPEND lib_srcs tif_win32.c) +else() list(APPEND lib_srcs tif_unix.c) endif() - -if(WIN32) - list(APPEND lib_srcs tif_win32.c) -endif(WIN32) - + ocv_warnings_disable(CMAKE_C_FLAGS -Wno-unused-but-set-variable -Wmissing-prototypes -Wmissing-declarations -Wundef -Wunused -Wsign-compare -Wcast-align -Wshadow -Wno-maybe-uninitialized -Wno-pointer-to-int-cast -Wno-int-to-pointer-cast) ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter) # clang diff --git a/cmake/OpenCVCRTLinkage.cmake b/cmake/OpenCVCRTLinkage.cmake index 8e689da80..8a297c685 100644 --- a/cmake/OpenCVCRTLinkage.cmake +++ b/cmake/OpenCVCRTLinkage.cmake @@ -36,6 +36,9 @@ endif() if (HAVE_WINRT) add_definitions(/DWINVER=0x0602 /DNTDDI_VERSION=NTDDI_WIN8 /D_WIN32_WINNT=0x0602) + set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} /appcontainer") + set(CMAKE_MODULE_LINKER_FLAGS "${CMAKE_MODULE_LINKER_FLAGS} /appcontainer") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} /appcontainer") endif() if(NOT BUILD_SHARED_LIBS AND BUILD_WITH_STATIC_CRT) diff --git a/doc/opencv-logo2.png b/doc/opencv-logo2.png index 615fd2add..bc71a2ae5 100644 Binary files a/doc/opencv-logo2.png and b/doc/opencv-logo2.png differ diff --git a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst index 03d82bbd4..76c5a4541 100644 --- a/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst +++ b/doc/tutorials/core/mat_the_basic_image_container/mat_the_basic_image_container.rst @@ -143,7 +143,7 @@ Although *Mat* works really well as an image container, it is also a general mat You cannot initialize the matrix values with this construction. It will only reallocate its matrix data memory if the new size will not fit into the old one. - + MATLAB style initializer: :basicstructures:`zeros() `, :basicstructures:`ones() `, ::basicstructures:`eyes() `. Specify size and data type to use: + + MATLAB style initializer: :basicstructures:`zeros() `, :basicstructures:`ones() `, :basicstructures:`eye() `. Specify size and data type to use: .. literalinclude:: ../../../../samples/cpp/tutorial_code/core/mat_the_basic_image_container/mat_the_basic_image_container.cpp :language: cpp diff --git a/modules/bioinspired/src/precomp.cpp b/modules/bioinspired/src/precomp.cpp deleted file mode 100644 index 3e0ec42de..000000000 --- a/modules/bioinspired/src/precomp.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -/* End of file. */ diff --git a/modules/bioinspired/src/retina.cpp b/modules/bioinspired/src/retina.cpp index 2ec7286bc..75e4b841f 100644 --- a/modules/bioinspired/src/retina.cpp +++ b/modules/bioinspired/src/retina.cpp @@ -628,6 +628,7 @@ void RetinaImpl::_init(const cv::Size inputSz, const bool colorMode, int colorSa delete _retinaFilter; _retinaFilter = new RetinaFilter(inputSz.height, inputSz.width, colorMode, colorSamplingMethod, useRetinaLogSampling, reductionFactor, samplingStrenght); + _retinaParameters.OPLandIplParvo.colorMode = colorMode; // prepare the default parameter XML file with default setup setup(_retinaParameters); diff --git a/modules/bioinspired/test/test_precomp.cpp b/modules/bioinspired/test/test_precomp.cpp deleted file mode 100644 index 5956e13e3..000000000 --- a/modules/bioinspired/test/test_precomp.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "test_precomp.hpp" diff --git a/modules/core/include/opencv2/core.hpp b/modules/core/include/opencv2/core.hpp index 9833315d5..c7f07ed45 100644 --- a/modules/core/include/opencv2/core.hpp +++ b/modules/core/include/opencv2/core.hpp @@ -670,6 +670,10 @@ public: //! reconstructs the original vector from the projection void backProject(InputArray vec, OutputArray result) const; + //! write and load PCA matrix + void write(FileStorage& fs ) const; + void read(const FileNode& fs); + Mat eigenvectors; //!< eigenvectors of the covariation matrix Mat eigenvalues; //!< eigenvalues of the covariation matrix Mat mean; //!< mean value subtracted before the projection and added after the back projection diff --git a/modules/core/perf/perf_stat.cpp b/modules/core/perf/perf_stat.cpp index 9698076ad..6b5f0ff52 100644 --- a/modules/core/perf/perf_stat.cpp +++ b/modules/core/perf/perf_stat.cpp @@ -83,8 +83,8 @@ PERF_TEST_P(Size_MatType, meanStdDev_mask, TYPICAL_MATS) TEST_CYCLE() meanStdDev(src, mean, dev, mask); - SANITY_CHECK(mean, 1e-6); - SANITY_CHECK(dev, 1e-6); + SANITY_CHECK(mean, 1e-5); + SANITY_CHECK(dev, 1e-5); } PERF_TEST_P(Size_MatType, countNonZero, testing::Combine( testing::Values( TYPICAL_MAT_SIZES ), testing::Values( CV_8UC1, CV_8SC1, CV_16UC1, CV_16SC1, CV_32SC1, CV_32FC1, CV_64FC1 ) )) diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index 7ad7b097d..313d06d88 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -2153,10 +2153,30 @@ cmp_(const T* src1, size_t step1, const T* src2, size_t step2, } } +#if ARITHM_USE_IPP +inline static IppCmpOp convert_cmp(int _cmpop) +{ + return _cmpop == CMP_EQ ? ippCmpEq : + _cmpop == CMP_GT ? ippCmpGreater : + _cmpop == CMP_GE ? ippCmpGreaterEq : + _cmpop == CMP_LT ? ippCmpLess : + _cmpop == CMP_LE ? ippCmpLessEq : + (IppCmpOp)-1; +} +#endif static void cmp8u(const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, Size size, void* _cmpop) { +#if ARITHM_USE_IPP + IppCmpOp op = convert_cmp(*(int *)_cmpop); + if( op >= 0 ) + { + fixSteps(size, sizeof(dst[0]), step1, step2, step); + if( ippiCompare_8u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + return; + } +#endif //vz optimized cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop); int code = *(int*)_cmpop; step1 /= sizeof(src1[0]); @@ -2231,12 +2251,30 @@ static void cmp8s(const schar* src1, size_t step1, const schar* src2, size_t ste static void cmp16u(const ushort* src1, size_t step1, const ushort* src2, size_t step2, uchar* dst, size_t step, Size size, void* _cmpop) { +#if ARITHM_USE_IPP + IppCmpOp op = convert_cmp(*(int *)_cmpop); + if( op >= 0 ) + { + fixSteps(size, sizeof(dst[0]), step1, step2, step); + if( ippiCompare_16u_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + return; + } +#endif cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop); } static void cmp16s(const short* src1, size_t step1, const short* src2, size_t step2, uchar* dst, size_t step, Size size, void* _cmpop) { +#if ARITHM_USE_IPP + IppCmpOp op = convert_cmp(*(int *)_cmpop); + if( op > 0 ) + { + fixSteps(size, sizeof(dst[0]), step1, step2, step); + if( ippiCompare_16s_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + return; + } +#endif //vz optimized cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop); int code = *(int*)_cmpop; @@ -2334,6 +2372,15 @@ static void cmp32s(const int* src1, size_t step1, const int* src2, size_t step2, static void cmp32f(const float* src1, size_t step1, const float* src2, size_t step2, uchar* dst, size_t step, Size size, void* _cmpop) { +#if ARITHM_USE_IPP + IppCmpOp op = convert_cmp(*(int *)_cmpop); + if( op >= 0 ) + { + fixSteps(size, sizeof(dst[0]), step1, step2, step); + if( ippiCompare_32f_C1R(src1, (int)step1, src2, (int)step2, dst, (int)step, (IppiSize&)size, op) >= 0 ) + return; + } +#endif cmp_(src1, step1, src2, step2, dst, step, size, *(int*)_cmpop); } diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index c76705f1b..a802868df 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -50,6 +50,13 @@ namespace cv # pragma warning(disable: 4748) #endif +#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 +#define USE_IPP_DFT 1 +#else +#undef USE_IPP_DFT +#endif + + /****************************************************************************************\ Discrete Fourier Transform \****************************************************************************************/ @@ -455,7 +462,7 @@ template<> struct DFT_VecR4 #endif -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT static void ippsDFTFwd_CToC( const Complex* src, Complex* dst, const void* spec, uchar* buf) { @@ -517,7 +524,7 @@ DFT( const Complex* src, Complex* dst, int n, int nf, const int* factors, const int* itab, const Complex* wave, int tab_size, const void* -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT spec #endif , Complex* buf, @@ -537,7 +544,7 @@ DFT( const Complex* src, Complex* dst, int n, T scale = (T)_scale; int tab_step; -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT if( spec ) { if( !inv ) @@ -957,7 +964,7 @@ DFT( const Complex* src, Complex* dst, int n, template static void RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, const Complex* wave, int tab_size, const void* -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT spec #endif , @@ -968,11 +975,18 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, int j, n2 = n >> 1; dst += complex_output; -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT if( spec ) { ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ); - goto finalize; + if( complex_output ) + { + dst[-1] = dst[0]; + dst[0] = 0; + if( (n & 1) == 0 ) + dst[n] = 0; + } + return; } #endif assert( tab_size == n ); @@ -1056,15 +1070,11 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, } } -#ifdef HAVE_IPP -finalize: -#endif if( complex_output && (n & 1) == 0 ) { dst[-1] = dst[0]; dst[0] = 0; - if( (n & 1) == 0 ) - dst[n] = 0; + dst[n] = 0; } } @@ -1076,7 +1086,7 @@ template static void CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, const Complex* wave, int tab_size, const void* -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT spec #endif , Complex* buf, @@ -1097,7 +1107,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ((T*)src)[1] = src[0]; src++; } -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT if( spec ) { ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ); @@ -1225,7 +1235,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, } } -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT finalize: #endif if( complex_input ) @@ -1458,7 +1468,7 @@ static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* fac } -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*); typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*); #endif @@ -1486,7 +1496,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2; int factors[34]; bool inplace_transform = false; -#ifdef HAVE_IPP +#ifdef USE_IPP_DFT AutoBuffer ippbuf; int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; #endif @@ -1546,12 +1556,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) } spec = 0; -#ifdef HAVE_IPP - if( -#if IPP_VERSION_MAJOR >= 7 - depth == CV_32F && // IPP 7.x and 8.0 have bug somewhere in double-precision DFT -#endif - len*count >= 64 ) // use IPP DFT if available +#ifdef USE_IPP_DFT + if( len*count >= 64 ) // use IPP DFT if available { int specsize=0, initsize=0, worksize=0; IppDFTGetSizeFunc getSizeFunc = 0; diff --git a/modules/core/src/glob.cpp b/modules/core/src/glob.cpp index 208b4e05c..c75bd2e66 100644 --- a/modules/core/src/glob.cpp +++ b/modules/core/src/glob.cpp @@ -91,6 +91,7 @@ namespace if(dir->handle == INVALID_HANDLE_VALUE) { /*closedir will do all cleanup*/ + delete dir; return 0; } return dir; @@ -140,6 +141,7 @@ static bool isDir(const cv::String& path, DIR* dir) { #if defined WIN32 || defined _WIN32 || defined WINCE DWORD attributes; + BOOL status = TRUE; if (dir) attributes = dir->data.dwFileAttributes; else @@ -149,14 +151,14 @@ static bool isDir(const cv::String& path, DIR* dir) wchar_t wpath[MAX_PATH]; size_t copied = mbstowcs(wpath, path.c_str(), MAX_PATH); CV_Assert((copied != MAX_PATH) && (copied != (size_t)-1)); - ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs); + status = ::GetFileAttributesExW(wpath, GetFileExInfoStandard, &all_attrs); #else - ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs); + status = ::GetFileAttributesExA(path.c_str(), GetFileExInfoStandard, &all_attrs); #endif attributes = all_attrs.dwFileAttributes; } - return (attributes != INVALID_FILE_ATTRIBUTES) && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0); + return status && ((attributes & FILE_ATTRIBUTE_DIRECTORY) != 0); #else (void)dir; struct stat stat_buf; diff --git a/modules/core/src/matmul.cpp b/modules/core/src/matmul.cpp index 7c33a3077..7d832cb0f 100644 --- a/modules/core/src/matmul.cpp +++ b/modules/core/src/matmul.cpp @@ -2911,6 +2911,27 @@ PCA& PCA::operator()(InputArray _data, InputArray __mean, int flags, int maxComp return *this; } +void PCA::write(FileStorage& fs ) const +{ + CV_Assert( fs.isOpened() ); + + fs << "name" << "PCA"; + fs << "vectors" << eigenvectors; + fs << "values" << eigenvalues; + fs << "mean" << mean; +} + +void PCA::read(const FileNode& fs) +{ + CV_Assert( !fs.empty() ); + String name = (String)fs["name"]; + CV_Assert( name == "PCA" ); + + cv::read(fs["vectors"], eigenvectors); + cv::read(fs["values"], eigenvalues); + cv::read(fs["mean"], mean); +} + template int computeCumulativeEnergy(const Mat& eigenvalues, double retainedVariance) { diff --git a/modules/core/src/stat.cpp b/modules/core/src/stat.cpp index 25729a920..86555fcc3 100644 --- a/modules/core/src/stat.cpp +++ b/modules/core/src/stat.cpp @@ -700,6 +700,99 @@ void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, Input CV_Assert( mask.empty() || mask.type() == CV_8U ); int k, cn = src.channels(), depth = src.depth(); + +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + size_t total_size = src.total(); + int rows = src.size[0], cols = (int)(total_size/rows); + if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) + { + Ipp64f mean_temp[3]; + Ipp64f stddev_temp[3]; + Ipp64f *pmean = &mean_temp[0]; + Ipp64f *pstddev = &stddev_temp[0]; + Mat mean, stddev; + int dcn_mean = -1; + if( _mean.needed() ) + { + if( !_mean.fixedSize() ) + _mean.create(cn, 1, CV_64F, -1, true); + mean = _mean.getMat(); + dcn_mean = (int)mean.total(); + pmean = (Ipp64f *)mean.data; + } + int dcn_stddev = -1; + if( _sdv.needed() ) + { + if( !_sdv.fixedSize() ) + _sdv.create(cn, 1, CV_64F, -1, true); + stddev = _sdv.getMat(); + dcn_stddev = (int)stddev.total(); + pstddev = (Ipp64f *)stddev.data; + } + for( int k = cn; k < dcn_mean; k++ ) + pmean[k] = 0; + for( int k = cn; k < dcn_stddev; k++ ) + pstddev[k] = 0; + IppiSize sz = { cols, rows }; + int type = src.type(); + if( !mask.empty() ) + { + typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, void *, int, IppiSize, Ipp64f *, Ipp64f *); + ippiMaskMeanStdDevFuncC1 ippFuncC1 = + type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR : + type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR : + type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR : + 0; + if( ippFuncC1 ) + { + if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, pmean, pstddev) >= 0 ) + return; + } + typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, void *, int, IppiSize, int, Ipp64f *, Ipp64f *); + ippiMaskMeanStdDevFuncC3 ippFuncC3 = + type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR : + type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR : + type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR : + 0; + if( ippFuncC3 ) + { + if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && + ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && + ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) + return; + } + } + else + { + typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *); + ippiMeanStdDevFuncC1 ippFuncC1 = + type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R : + type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R : + //type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0 + 0; + if( ippFuncC1 ) + { + if( ippFuncC1(src.data, (int)src.step[0], sz, pmean, pstddev) >= 0 ) + return; + } + typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *); + ippiMeanStdDevFuncC3 ippFuncC3 = + type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR : + type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR : + type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR : + 0; + if( ippFuncC3 ) + { + if( ippFuncC3(src.data, (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && + ippFuncC3(src.data, (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && + ippFuncC3(src.data, (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) + return; + } + } + } +#endif + + SumSqrFunc func = getSumSqrTab(depth); CV_Assert( func != 0 ); @@ -919,6 +1012,83 @@ void cv::minMaxIdx(InputArray _src, double* minVal, CV_Assert( (cn == 1 && (mask.empty() || mask.type() == CV_8U)) || (cn >= 1 && mask.empty() && !minIdx && !maxIdx) ); + +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + size_t total_size = src.total(); + int rows = src.size[0], cols = (int)(total_size/rows); + if( cn == 1 && ( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) ) + { + IppiSize sz = { cols, rows }; + int type = src.type(); + if( !mask.empty() ) + { + typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *); + ippiMaskMinMaxIndxFuncC1 ippFuncC1 = + type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR : + type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR : + type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : + 0; + if( ippFuncC1 ) + { + Ipp32f min, max; + IppiPoint minp, maxp; + if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 ) + { + if( minVal ) + *minVal = (double)min; + if( maxVal ) + *maxVal = (double)max; + if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.data[0] ) + minp.x = maxp.x = -1; + if( minIdx ) + { + size_t minidx = minp.y * cols + minp.x + 1; + ofs2idx(src, minidx, minIdx); + } + if( maxIdx ) + { + size_t maxidx = maxp.y * cols + maxp.x + 1; + ofs2idx(src, maxidx, maxIdx); + } + return; + } + } + } + else + { + typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *); + ippiMinMaxIndxFuncC1 ippFuncC1 = + type == CV_8UC1 ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R : + type == CV_16UC1 ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R : + type == CV_32FC1 ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : + 0; + if( ippFuncC1 ) + { + Ipp32f min, max; + IppiPoint minp, maxp; + if( ippFuncC1(src.data, (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 ) + { + if( minVal ) + *minVal = (double)min; + if( maxVal ) + *maxVal = (double)max; + if( minIdx ) + { + size_t minidx = minp.y * cols + minp.x + 1; + ofs2idx(src, minidx, minIdx); + } + if( maxIdx ) + { + size_t maxidx = maxp.y * cols + maxp.x + 1; + ofs2idx(src, maxidx, maxIdx); + } + return; + } + } + } + } +#endif + MinMaxIdxFunc func = getMinmaxTab(depth); CV_Assert( func != 0 ); @@ -1443,6 +1613,147 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR || ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src.type() == CV_8U) ); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + size_t total_size = src.total(); + int rows = src.size[0], cols = (int)(total_size/rows); + if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) + && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ) + { + IppiSize sz = { cols, rows }; + int type = src.type(); + if( !mask.empty() ) + { + typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *); + ippiMaskNormFuncC1 ippFuncC1 = + normType == NORM_INF ? + (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR : + 0) : + normType == NORM_L1 ? + (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR : + 0) : 0; + if( ippFuncC1 ) + { + Ipp64f norm; + if( ippFuncC1(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 ) + { + return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; + } + } + typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *); + ippiMaskNormFuncC3 ippFuncC3 = + normType == NORM_INF ? + (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR : + type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR : + type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR : + type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR : + 0) : + normType == NORM_L1 ? + (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR : + type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR : + type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR : + type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR : + type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR : + type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR : + type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR : + 0) : 0; + if( ippFuncC3 ) + { + Ipp64f norm1, norm2, norm3; + if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 && + ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 && + ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0) + { + Ipp64f norm = + normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) : + normType == NORM_L1 ? norm1 + norm2 + norm3 : + normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) : + 0; + return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; + } + } + } + else + { + typedef IppStatus (CV_STDCALL* ippiNormFunc)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); + ippiNormFunc ippFunc = + normType == NORM_INF ? + (type == CV_8UC1 ? (ippiNormFunc)ippiNorm_Inf_8u_C1R : + type == CV_8UC3 ? (ippiNormFunc)ippiNorm_Inf_8u_C3R : + type == CV_8UC4 ? (ippiNormFunc)ippiNorm_Inf_8u_C4R : + type == CV_16UC1 ? (ippiNormFunc)ippiNorm_Inf_16u_C1R : + type == CV_16UC3 ? (ippiNormFunc)ippiNorm_Inf_16u_C3R : + type == CV_16UC4 ? (ippiNormFunc)ippiNorm_Inf_16u_C4R : + type == CV_16SC1 ? (ippiNormFunc)ippiNorm_Inf_16s_C1R : + //type == CV_16SC3 ? (ippiNormFunc)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 + //type == CV_16SC4 ? (ippiNormFunc)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 + type == CV_32FC1 ? (ippiNormFunc)ippiNorm_Inf_32f_C1R : + type == CV_32FC3 ? (ippiNormFunc)ippiNorm_Inf_32f_C3R : + type == CV_32FC4 ? (ippiNormFunc)ippiNorm_Inf_32f_C4R : + 0) : + normType == NORM_L1 ? + (type == CV_8UC1 ? (ippiNormFunc)ippiNorm_L1_8u_C1R : + type == CV_8UC3 ? (ippiNormFunc)ippiNorm_L1_8u_C3R : + type == CV_8UC4 ? (ippiNormFunc)ippiNorm_L1_8u_C4R : + type == CV_16UC1 ? (ippiNormFunc)ippiNorm_L1_16u_C1R : + type == CV_16UC3 ? (ippiNormFunc)ippiNorm_L1_16u_C3R : + type == CV_16UC4 ? (ippiNormFunc)ippiNorm_L1_16u_C4R : + type == CV_16SC1 ? (ippiNormFunc)ippiNorm_L1_16s_C1R : + type == CV_16SC3 ? (ippiNormFunc)ippiNorm_L1_16s_C3R : + type == CV_16SC4 ? (ippiNormFunc)ippiNorm_L1_16s_C4R : + type == CV_32FC1 ? (ippiNormFunc)ippiNorm_L1_32f_C1R : + type == CV_32FC3 ? (ippiNormFunc)ippiNorm_L1_32f_C3R : + type == CV_32FC4 ? (ippiNormFunc)ippiNorm_L1_32f_C4R : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC1 ? (ippiNormFunc)ippiNorm_L2_8u_C1R : + type == CV_8UC3 ? (ippiNormFunc)ippiNorm_L2_8u_C3R : + type == CV_8UC4 ? (ippiNormFunc)ippiNorm_L2_8u_C4R : + type == CV_16UC1 ? (ippiNormFunc)ippiNorm_L2_16u_C1R : + type == CV_16UC3 ? (ippiNormFunc)ippiNorm_L2_16u_C3R : + type == CV_16UC4 ? (ippiNormFunc)ippiNorm_L2_16u_C4R : + type == CV_16SC1 ? (ippiNormFunc)ippiNorm_L2_16s_C1R : + type == CV_16SC3 ? (ippiNormFunc)ippiNorm_L2_16s_C3R : + type == CV_16SC4 ? (ippiNormFunc)ippiNorm_L2_16s_C4R : + type == CV_32FC1 ? (ippiNormFunc)ippiNorm_L2_32f_C1R : + type == CV_32FC3 ? (ippiNormFunc)ippiNorm_L2_32f_C3R : + type == CV_32FC4 ? (ippiNormFunc)ippiNorm_L2_32f_C4R : + 0) : 0; + if( ippFunc ) + { + Ipp64f norm_array[4]; + if( ippFunc(src.data, (int)src.step[0], sz, norm_array, ippAlgHintAccurate) >= 0 ) + { + Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0]; + for( int i = 1; i < cn; i++ ) + { + norm = + normType == NORM_INF ? std::max(norm, norm_array[i]) : + normType == NORM_L1 ? norm + norm_array[i] : + normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] : + 0; + } + return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm; + } + } + } + } +#endif + if( src.isContinuous() && mask.empty() ) { size_t len = src.total()*cn; @@ -1581,7 +1892,84 @@ double cv::norm( InputArray _src, int normType, InputArray _mask ) double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) { if( normType & CV_RELATIVE ) + { +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); + + CV_Assert( src1.size == src2.size && src1.type() == src2.type() ); + + normType &= 7; + CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR || + ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) ); + size_t total_size = src1.total(); + int rows = src1.size[0], cols = (int)(total_size/rows); + if( src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) + && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ) + { + IppiSize sz = { cols, rows }; + int type = src1.type(); + if( !mask.empty() ) + { + typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *); + ippiMaskNormRelFuncC1 ippFuncC1 = + normType == NORM_INF ? + (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR : + 0) : + normType == NORM_L1 ? + (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR : + 0) : 0; + if( ippFuncC1 ) + { + Ipp64f norm; + if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 ) + return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; + } + } + else + { + typedef IppStatus (CV_STDCALL* ippiNormRelFunc)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); + ippiNormRelFunc ippFunc = + normType == NORM_INF ? + (type == CV_8UC1 ? (ippiNormRelFunc)ippiNormRel_Inf_8u_C1R : + type == CV_16UC1 ? (ippiNormRelFunc)ippiNormRel_Inf_16u_C1R : + type == CV_16SC1 ? (ippiNormRelFunc)ippiNormRel_Inf_16s_C1R : + type == CV_32FC1 ? (ippiNormRelFunc)ippiNormRel_Inf_32f_C1R : + 0) : + normType == NORM_L1 ? + (type == CV_8UC1 ? (ippiNormRelFunc)ippiNormRel_L1_8u_C1R : + type == CV_16UC1 ? (ippiNormRelFunc)ippiNormRel_L1_16u_C1R : + type == CV_16SC1 ? (ippiNormRelFunc)ippiNormRel_L1_16s_C1R : + type == CV_32FC1 ? (ippiNormRelFunc)ippiNormRel_L1_32f_C1R : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC1 ? (ippiNormRelFunc)ippiNormRel_L2_8u_C1R : + type == CV_16UC1 ? (ippiNormRelFunc)ippiNormRel_L2_16u_C1R : + type == CV_16SC1 ? (ippiNormRelFunc)ippiNormRel_L2_16s_C1R : + type == CV_32FC1 ? (ippiNormRelFunc)ippiNormRel_L2_32f_C1R : + 0) : 0; + if( ippFunc ) + { + Ipp64f norm; + if( ippFunc(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 ) + return (double)norm; + } + } + } +#endif return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON); + } Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); int depth = src1.depth(), cn = src1.channels(); @@ -1592,6 +1980,145 @@ double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _m CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR || ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) ); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + size_t total_size = src1.total(); + int rows = src1.size[0], cols = (int)(total_size/rows); + if( src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) + && (normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) ) + { + IppiSize sz = { cols, rows }; + int type = src1.type(); + if( !mask.empty() ) + { + typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *); + ippiMaskNormDiffFuncC1 ippFuncC1 = + normType == NORM_INF ? + (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR : + 0) : + normType == NORM_L1 ? + (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR : + type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR : + type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR : + type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR : + 0) : 0; + if( ippFuncC1 ) + { + Ipp64f norm; + if( ippFuncC1(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, &norm) >= 0 ) + return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; + } + typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *); + ippiMaskNormDiffFuncC3 ippFuncC3 = + normType == NORM_INF ? + (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR : + type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR : + type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR : + type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR : + 0) : + normType == NORM_L1 ? + (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR : + type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR : + type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR : + type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR : + type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR : + type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR : + type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR : + 0) : 0; + if( ippFuncC3 ) + { + Ipp64f norm1, norm2, norm3; + if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 && + ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 && + ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0) + { + Ipp64f norm = + normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) : + normType == NORM_L1 ? norm1 + norm2 + norm3 : + normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) : + 0; + return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; + } + } + } + else + { + typedef IppStatus (CV_STDCALL* ippiNormDiffFunc)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); + ippiNormDiffFunc ippFunc = + normType == NORM_INF ? + (type == CV_8UC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_8u_C1R : + type == CV_8UC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_8u_C3R : + type == CV_8UC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_8u_C4R : + type == CV_16UC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16u_C1R : + type == CV_16UC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16u_C3R : + type == CV_16UC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16u_C4R : + type == CV_16SC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C1R : + //type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 + //type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 + type == CV_32FC1 ? (ippiNormDiffFunc)ippiNormDiff_Inf_32f_C1R : + type == CV_32FC3 ? (ippiNormDiffFunc)ippiNormDiff_Inf_32f_C3R : + type == CV_32FC4 ? (ippiNormDiffFunc)ippiNormDiff_Inf_32f_C4R : + 0) : + normType == NORM_L1 ? + (type == CV_8UC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_8u_C1R : + type == CV_8UC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_8u_C3R : + type == CV_8UC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_8u_C4R : + type == CV_16UC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_16u_C1R : + type == CV_16UC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_16u_C3R : + type == CV_16UC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_16u_C4R : + type == CV_16SC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_16s_C1R : + type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_16s_C3R : + type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_16s_C4R : + type == CV_32FC1 ? (ippiNormDiffFunc)ippiNormDiff_L1_32f_C1R : + type == CV_32FC3 ? (ippiNormDiffFunc)ippiNormDiff_L1_32f_C3R : + type == CV_32FC4 ? (ippiNormDiffFunc)ippiNormDiff_L1_32f_C4R : + 0) : + normType == NORM_L2 || normType == NORM_L2SQR ? + (type == CV_8UC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_8u_C1R : + type == CV_8UC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_8u_C3R : + type == CV_8UC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_8u_C4R : + type == CV_16UC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_16u_C1R : + type == CV_16UC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_16u_C3R : + type == CV_16UC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_16u_C4R : + type == CV_16SC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_16s_C1R : + type == CV_16SC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_16s_C3R : + type == CV_16SC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_16s_C4R : + type == CV_32FC1 ? (ippiNormDiffFunc)ippiNormDiff_L2_32f_C1R : + type == CV_32FC3 ? (ippiNormDiffFunc)ippiNormDiff_L2_32f_C3R : + type == CV_32FC4 ? (ippiNormDiffFunc)ippiNormDiff_L2_32f_C4R : + 0) : 0; + if( ippFunc ) + { + Ipp64f norm_array[4]; + if( ippFunc(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) >= 0 ) + { + Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0]; + for( int i = 1; i < src1.channels(); i++ ) + { + norm = + normType == NORM_INF ? std::max(norm, norm_array[i]) : + normType == NORM_L1 ? norm + norm_array[i] : + normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] : + 0; + } + return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm; + } + } + } + } +#endif + if( src1.isContinuous() && src2.isContinuous() && mask.empty() ) { size_t len = src1.total()*src1.channels(); diff --git a/modules/core/test/test_mat.cpp b/modules/core/test/test_mat.cpp index 245347b8b..6e3ec03dc 100644 --- a/modules/core/test/test_mat.cpp +++ b/modules/core/test/test_mat.cpp @@ -510,6 +510,32 @@ protected: return; } #endif + // Test read and write + FileStorage fs( "PCA_store.yml", FileStorage::WRITE ); + rPCA.write( fs ); + fs.release(); + + PCA lPCA; + fs.open( "PCA_store.yml", FileStorage::READ ); + lPCA.read( fs.root() ); + err = norm( rPCA.eigenvectors, lPCA.eigenvectors, CV_RELATIVE_L2 ); + if( err > 0 ) + { + ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err ); + ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY ); + } + err = norm( rPCA.eigenvalues, lPCA.eigenvalues, CV_RELATIVE_L2 ); + if( err > 0 ) + { + ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err ); + ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY ); + } + err = norm( rPCA.mean, lPCA.mean, CV_RELATIVE_L2 ); + if( err > 0 ) + { + ts->printf( cvtest::TS::LOG, "bad accuracy of write/load functions (YML); err = %f\n", err ); + ts->set_failed_test_info( cvtest::TS::FAIL_BAD_ACCURACY ); + } } }; diff --git a/modules/cudaoptflow/perf/perf_optflow.cpp b/modules/cudaoptflow/perf/perf_optflow.cpp index 8edf00250..7bf383c15 100644 --- a/modules/cudaoptflow/perf/perf_optflow.cpp +++ b/modules/cudaoptflow/perf/perf_optflow.cpp @@ -368,8 +368,8 @@ PERF_TEST_P(ImagePair, OpticalFlowDual_TVL1, TEST_CYCLE() d_alg(d_frame0, d_frame1, u, v); - CUDA_SANITY_CHECK(u, 1e-2); - CUDA_SANITY_CHECK(v, 1e-2); + CUDA_SANITY_CHECK(u, 1e-1); + CUDA_SANITY_CHECK(v, 1e-1); } else { diff --git a/modules/cudaoptflow/src/cuda/tvl1flow.cu b/modules/cudaoptflow/src/cuda/tvl1flow.cu index 1de88b794..b85dee701 100644 --- a/modules/cudaoptflow/src/cuda/tvl1flow.cu +++ b/modules/cudaoptflow/src/cuda/tvl1flow.cu @@ -211,7 +211,7 @@ namespace tvl1flow const PtrStepf grad, const PtrStepf rho_c, const PtrStepf p11, const PtrStepf p12, const PtrStepf p21, const PtrStepf p22, PtrStepf u1, PtrStepf u2, PtrStepf error, - const float l_t, const float theta) + const float l_t, const float theta, const bool calcError) { const int x = blockIdx.x * blockDim.x + threadIdx.x; const int y = blockIdx.y * blockDim.y + threadIdx.y; @@ -265,21 +265,24 @@ namespace tvl1flow u1(y, x) = u1NewVal; u2(y, x) = u2NewVal; - const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); - const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); - error(y, x) = n1 + n2; + if (calcError) + { + const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error(y, x) = n1 + n2; + } } void estimateU(PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, - float l_t, float theta) + float l_t, float theta, bool calcError) { const dim3 block(32, 8); const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y)); - estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta); + estimateUKernel<<>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, error, l_t, theta, calcError); cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaDeviceSynchronize() ); diff --git a/modules/cudaoptflow/src/tvl1flow.cpp b/modules/cudaoptflow/src/tvl1flow.cpp index 43355d861..7b6882d9f 100644 --- a/modules/cudaoptflow/src/tvl1flow.cpp +++ b/modules/cudaoptflow/src/tvl1flow.cpp @@ -173,7 +173,7 @@ namespace tvl1flow PtrStepSzf grad, PtrStepSzf rho_c, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf error, - float l_t, float theta); + float l_t, float theta, bool calcError); void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, float taut); } @@ -218,12 +218,24 @@ void cv::cuda::OpticalFlowDual_TVL1_CUDA::procOneScale(const GpuMat& I0, const G warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); double error = std::numeric_limits::max(); + double prevError = 0.0; for (int n = 0; error > scaledEpsilon && n < iterations; ++n) { - estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast(theta)); + // some tweaks to make sum operation less frequently + bool calcError = (epsilon > 0) && (n & 0x1) && (prevError < scaledEpsilon); - if (epsilon > 0) + estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, u1, u2, diff, l_t, static_cast(theta), calcError); + + if (calcError) + { error = cuda::sum(diff, norm_buf)[0]; + prevError = error; + } + else + { + error = std::numeric_limits::max(); + prevError -= scaledEpsilon; + } estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); } diff --git a/modules/highgui/src/files_Qt/Milky/48/1.png b/modules/highgui/src/files_Qt/Milky/48/1.png index af3dc132c..69b4dee0a 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/1.png and b/modules/highgui/src/files_Qt/Milky/48/1.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/10.png b/modules/highgui/src/files_Qt/Milky/48/10.png index d01626050..34185e1fa 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/10.png and b/modules/highgui/src/files_Qt/Milky/48/10.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/11.png b/modules/highgui/src/files_Qt/Milky/48/11.png index ac236491b..565ad498e 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/11.png and b/modules/highgui/src/files_Qt/Milky/48/11.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/12.png b/modules/highgui/src/files_Qt/Milky/48/12.png index e297afe22..72712fe9e 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/12.png and b/modules/highgui/src/files_Qt/Milky/48/12.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/13.png b/modules/highgui/src/files_Qt/Milky/48/13.png index 691e73bf9..6cb5c5dca 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/13.png and b/modules/highgui/src/files_Qt/Milky/48/13.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/14.png b/modules/highgui/src/files_Qt/Milky/48/14.png index ede537cef..8d217be46 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/14.png and b/modules/highgui/src/files_Qt/Milky/48/14.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/16.png b/modules/highgui/src/files_Qt/Milky/48/16.png index 3e675d4b1..2e011c739 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/16.png and b/modules/highgui/src/files_Qt/Milky/48/16.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/17.png b/modules/highgui/src/files_Qt/Milky/48/17.png index 32ddc3eba..4a7e5de41 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/17.png and b/modules/highgui/src/files_Qt/Milky/48/17.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/18.png b/modules/highgui/src/files_Qt/Milky/48/18.png index be8e3e716..43f5405f5 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/18.png and b/modules/highgui/src/files_Qt/Milky/48/18.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/19.png b/modules/highgui/src/files_Qt/Milky/48/19.png index e5145d052..203510ddd 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/19.png and b/modules/highgui/src/files_Qt/Milky/48/19.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/2.png b/modules/highgui/src/files_Qt/Milky/48/2.png index d2ad4a4c3..8f4903eea 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/2.png and b/modules/highgui/src/files_Qt/Milky/48/2.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/20.png b/modules/highgui/src/files_Qt/Milky/48/20.png index c53289aae..1a591ca41 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/20.png and b/modules/highgui/src/files_Qt/Milky/48/20.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/21.png b/modules/highgui/src/files_Qt/Milky/48/21.png index 2df56ef77..e65e4acd3 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/21.png and b/modules/highgui/src/files_Qt/Milky/48/21.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/22.png b/modules/highgui/src/files_Qt/Milky/48/22.png index 36d41cc63..a81aca191 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/22.png and b/modules/highgui/src/files_Qt/Milky/48/22.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/23.png b/modules/highgui/src/files_Qt/Milky/48/23.png index eb51b8385..ab9e60cfc 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/23.png and b/modules/highgui/src/files_Qt/Milky/48/23.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/24.png b/modules/highgui/src/files_Qt/Milky/48/24.png index b0033cf64..4e5629cb4 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/24.png and b/modules/highgui/src/files_Qt/Milky/48/24.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/25.png b/modules/highgui/src/files_Qt/Milky/48/25.png index d41d79295..da93a5962 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/25.png and b/modules/highgui/src/files_Qt/Milky/48/25.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/26.png b/modules/highgui/src/files_Qt/Milky/48/26.png index 055c496a2..6ba5d6c10 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/26.png and b/modules/highgui/src/files_Qt/Milky/48/26.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/27.png b/modules/highgui/src/files_Qt/Milky/48/27.png index 34f5f0c8c..a14e20420 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/27.png and b/modules/highgui/src/files_Qt/Milky/48/27.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/28.png b/modules/highgui/src/files_Qt/Milky/48/28.png index 9c94db1aa..f0df2d35e 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/28.png and b/modules/highgui/src/files_Qt/Milky/48/28.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/29.png b/modules/highgui/src/files_Qt/Milky/48/29.png index 9ca7137ed..6d79d929f 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/29.png and b/modules/highgui/src/files_Qt/Milky/48/29.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/3.png b/modules/highgui/src/files_Qt/Milky/48/3.png index 5144bbfc3..40d594668 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/3.png and b/modules/highgui/src/files_Qt/Milky/48/3.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/30.png b/modules/highgui/src/files_Qt/Milky/48/30.png index db76e78f8..44037a72f 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/30.png and b/modules/highgui/src/files_Qt/Milky/48/30.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/31.png b/modules/highgui/src/files_Qt/Milky/48/31.png index e79c0dfda..b9d421337 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/31.png and b/modules/highgui/src/files_Qt/Milky/48/31.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/32.png b/modules/highgui/src/files_Qt/Milky/48/32.png index 414eecfde..d72749c50 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/32.png and b/modules/highgui/src/files_Qt/Milky/48/32.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/33.png b/modules/highgui/src/files_Qt/Milky/48/33.png index 2ec44d312..85bb86ff0 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/33.png and b/modules/highgui/src/files_Qt/Milky/48/33.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/34.png b/modules/highgui/src/files_Qt/Milky/48/34.png index 63f3b0465..fd095ee77 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/34.png and b/modules/highgui/src/files_Qt/Milky/48/34.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/35.png b/modules/highgui/src/files_Qt/Milky/48/35.png index 41a041599..abb64d612 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/35.png and b/modules/highgui/src/files_Qt/Milky/48/35.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/37.png b/modules/highgui/src/files_Qt/Milky/48/37.png index 70be99dac..fc9f361ec 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/37.png and b/modules/highgui/src/files_Qt/Milky/48/37.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/38.png b/modules/highgui/src/files_Qt/Milky/48/38.png index 1faaa2e59..81cd7e139 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/38.png and b/modules/highgui/src/files_Qt/Milky/48/38.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/39.png b/modules/highgui/src/files_Qt/Milky/48/39.png index b5b7b980f..d76effcd7 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/39.png and b/modules/highgui/src/files_Qt/Milky/48/39.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/4.png b/modules/highgui/src/files_Qt/Milky/48/4.png index fe93c3503..a6a8d07a7 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/4.png and b/modules/highgui/src/files_Qt/Milky/48/4.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/40.png b/modules/highgui/src/files_Qt/Milky/48/40.png index 103590607..f17ad6aa1 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/40.png and b/modules/highgui/src/files_Qt/Milky/48/40.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/41.png b/modules/highgui/src/files_Qt/Milky/48/41.png index 622e1d8b2..4553c0458 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/41.png and b/modules/highgui/src/files_Qt/Milky/48/41.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/42.png b/modules/highgui/src/files_Qt/Milky/48/42.png index 2c20bf60f..fb5f9a2a6 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/42.png and b/modules/highgui/src/files_Qt/Milky/48/42.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/43.png b/modules/highgui/src/files_Qt/Milky/48/43.png index b849f939b..3c958420b 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/43.png and b/modules/highgui/src/files_Qt/Milky/48/43.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/44.png b/modules/highgui/src/files_Qt/Milky/48/44.png index 3902ba1e3..ef3c114d4 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/44.png and b/modules/highgui/src/files_Qt/Milky/48/44.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/45.png b/modules/highgui/src/files_Qt/Milky/48/45.png index cd4d6deeb..a77fb9e14 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/45.png and b/modules/highgui/src/files_Qt/Milky/48/45.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/5.png b/modules/highgui/src/files_Qt/Milky/48/5.png index 60827ff7e..46df26f5e 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/5.png and b/modules/highgui/src/files_Qt/Milky/48/5.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/6.png b/modules/highgui/src/files_Qt/Milky/48/6.png index ed04e555d..72853e685 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/6.png and b/modules/highgui/src/files_Qt/Milky/48/6.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/7.png b/modules/highgui/src/files_Qt/Milky/48/7.png index 6eca1fadf..832772b59 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/7.png and b/modules/highgui/src/files_Qt/Milky/48/7.png differ diff --git a/modules/highgui/src/files_Qt/Milky/48/9.png b/modules/highgui/src/files_Qt/Milky/48/9.png index e6a9be154..4ece823fe 100644 Binary files a/modules/highgui/src/files_Qt/Milky/48/9.png and b/modules/highgui/src/files_Qt/Milky/48/9.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/1.png b/modules/highgui/src/files_Qt/Milky/64/1.png index 9222f069f..36a19f6e0 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/1.png and b/modules/highgui/src/files_Qt/Milky/64/1.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/10.png b/modules/highgui/src/files_Qt/Milky/64/10.png index e450e4ae3..28e0be1f5 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/10.png and b/modules/highgui/src/files_Qt/Milky/64/10.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/100.png b/modules/highgui/src/files_Qt/Milky/64/100.png index 2920ba3db..fbcfbaabb 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/100.png and b/modules/highgui/src/files_Qt/Milky/64/100.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/11.png b/modules/highgui/src/files_Qt/Milky/64/11.png index 09e42528a..1002f3457 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/11.png and b/modules/highgui/src/files_Qt/Milky/64/11.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/12.png b/modules/highgui/src/files_Qt/Milky/64/12.png index 42ddcf2fa..822bc42e2 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/12.png and b/modules/highgui/src/files_Qt/Milky/64/12.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/126.png b/modules/highgui/src/files_Qt/Milky/64/126.png index 4d34cfeb1..d8715004a 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/126.png and b/modules/highgui/src/files_Qt/Milky/64/126.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/13.png b/modules/highgui/src/files_Qt/Milky/64/13.png index a5d1aff9c..c4ae0c42f 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/13.png and b/modules/highgui/src/files_Qt/Milky/64/13.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/14.png b/modules/highgui/src/files_Qt/Milky/64/14.png index 091cf5847..ce96e04ce 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/14.png and b/modules/highgui/src/files_Qt/Milky/64/14.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/15.png b/modules/highgui/src/files_Qt/Milky/64/15.png index f55ea7dd4..eb9dca50a 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/15.png and b/modules/highgui/src/files_Qt/Milky/64/15.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/16.png b/modules/highgui/src/files_Qt/Milky/64/16.png index 008823301..9acf8c20b 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/16.png and b/modules/highgui/src/files_Qt/Milky/64/16.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/17.png b/modules/highgui/src/files_Qt/Milky/64/17.png index 7ab1b957f..18d286ab2 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/17.png and b/modules/highgui/src/files_Qt/Milky/64/17.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/18.png b/modules/highgui/src/files_Qt/Milky/64/18.png index c0772630f..3b76256ba 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/18.png and b/modules/highgui/src/files_Qt/Milky/64/18.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/19.png b/modules/highgui/src/files_Qt/Milky/64/19.png index d167b9ace..b0de5da46 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/19.png and b/modules/highgui/src/files_Qt/Milky/64/19.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/2.png b/modules/highgui/src/files_Qt/Milky/64/2.png index f54098767..3523cc670 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/2.png and b/modules/highgui/src/files_Qt/Milky/64/2.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/21.png b/modules/highgui/src/files_Qt/Milky/64/21.png index d1ae8e648..4c94f5442 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/21.png and b/modules/highgui/src/files_Qt/Milky/64/21.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/22.png b/modules/highgui/src/files_Qt/Milky/64/22.png index ca026ecbc..f7f0c3d9e 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/22.png and b/modules/highgui/src/files_Qt/Milky/64/22.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/23.png b/modules/highgui/src/files_Qt/Milky/64/23.png index a374ea029..11b0899f5 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/23.png and b/modules/highgui/src/files_Qt/Milky/64/23.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/24.png b/modules/highgui/src/files_Qt/Milky/64/24.png index 5566ebd4d..94a8e9496 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/24.png and b/modules/highgui/src/files_Qt/Milky/64/24.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/25.png b/modules/highgui/src/files_Qt/Milky/64/25.png index 368f7a1c6..4e650dc81 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/25.png and b/modules/highgui/src/files_Qt/Milky/64/25.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/26.png b/modules/highgui/src/files_Qt/Milky/64/26.png index 10ecc3be9..6dec63383 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/26.png and b/modules/highgui/src/files_Qt/Milky/64/26.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/27.png b/modules/highgui/src/files_Qt/Milky/64/27.png index 9946afb50..1ab2410c7 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/27.png and b/modules/highgui/src/files_Qt/Milky/64/27.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/28.png b/modules/highgui/src/files_Qt/Milky/64/28.png index 9094ba190..7d4d62435 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/28.png and b/modules/highgui/src/files_Qt/Milky/64/28.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/29.png b/modules/highgui/src/files_Qt/Milky/64/29.png index dc593dfcd..74a499650 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/29.png and b/modules/highgui/src/files_Qt/Milky/64/29.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/30.png b/modules/highgui/src/files_Qt/Milky/64/30.png index 2430966be..d1fbb7208 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/30.png and b/modules/highgui/src/files_Qt/Milky/64/30.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/31.png b/modules/highgui/src/files_Qt/Milky/64/31.png index 1748d7135..70d95908f 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/31.png and b/modules/highgui/src/files_Qt/Milky/64/31.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/32.png b/modules/highgui/src/files_Qt/Milky/64/32.png index d225fb3d9..11b66ad44 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/32.png and b/modules/highgui/src/files_Qt/Milky/64/32.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/33.png b/modules/highgui/src/files_Qt/Milky/64/33.png index 3fb4ac5b1..c76151cc3 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/33.png and b/modules/highgui/src/files_Qt/Milky/64/33.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/34.png b/modules/highgui/src/files_Qt/Milky/64/34.png index 0dd5f23ef..1b009aac0 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/34.png and b/modules/highgui/src/files_Qt/Milky/64/34.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/35.png b/modules/highgui/src/files_Qt/Milky/64/35.png index 5cdb35c97..c9b408445 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/35.png and b/modules/highgui/src/files_Qt/Milky/64/35.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/36.png b/modules/highgui/src/files_Qt/Milky/64/36.png index d2b244419..d5aad30b6 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/36.png and b/modules/highgui/src/files_Qt/Milky/64/36.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/37.png b/modules/highgui/src/files_Qt/Milky/64/37.png index ef2b8a56d..b0898c8cc 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/37.png and b/modules/highgui/src/files_Qt/Milky/64/37.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/38.png b/modules/highgui/src/files_Qt/Milky/64/38.png index f1a700c14..0aa9224a6 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/38.png and b/modules/highgui/src/files_Qt/Milky/64/38.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/39.png b/modules/highgui/src/files_Qt/Milky/64/39.png index 121a3f8a0..f67ae06b5 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/39.png and b/modules/highgui/src/files_Qt/Milky/64/39.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/4.png b/modules/highgui/src/files_Qt/Milky/64/4.png index 264f8bd6a..eb1a5facd 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/4.png and b/modules/highgui/src/files_Qt/Milky/64/4.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/40.png b/modules/highgui/src/files_Qt/Milky/64/40.png index a04765de1..f39b08537 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/40.png and b/modules/highgui/src/files_Qt/Milky/64/40.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/41.png b/modules/highgui/src/files_Qt/Milky/64/41.png index 3bed0ebda..3061701f5 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/41.png and b/modules/highgui/src/files_Qt/Milky/64/41.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/42.png b/modules/highgui/src/files_Qt/Milky/64/42.png index c17662099..1b979766a 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/42.png and b/modules/highgui/src/files_Qt/Milky/64/42.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/43.png b/modules/highgui/src/files_Qt/Milky/64/43.png index 295d2886d..f3d9e5d43 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/43.png and b/modules/highgui/src/files_Qt/Milky/64/43.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/44.png b/modules/highgui/src/files_Qt/Milky/64/44.png index a8fba7545..a549bfe0c 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/44.png and b/modules/highgui/src/files_Qt/Milky/64/44.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/45.png b/modules/highgui/src/files_Qt/Milky/64/45.png index d65e2bd93..123fa1a3d 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/45.png and b/modules/highgui/src/files_Qt/Milky/64/45.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/5.png b/modules/highgui/src/files_Qt/Milky/64/5.png index 185219d26..9d3c5a368 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/5.png and b/modules/highgui/src/files_Qt/Milky/64/5.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/6.png b/modules/highgui/src/files_Qt/Milky/64/6.png index fa55cbc04..5ae8a8e67 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/6.png and b/modules/highgui/src/files_Qt/Milky/64/6.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/65.png b/modules/highgui/src/files_Qt/Milky/64/65.png index 46ca28ee1..f4e9a2880 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/65.png and b/modules/highgui/src/files_Qt/Milky/64/65.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/7.png b/modules/highgui/src/files_Qt/Milky/64/7.png index 98083e066..e97ab37ce 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/7.png and b/modules/highgui/src/files_Qt/Milky/64/7.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/8.png b/modules/highgui/src/files_Qt/Milky/64/8.png index 2daaaeffc..639ac9a1f 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/8.png and b/modules/highgui/src/files_Qt/Milky/64/8.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/9.png b/modules/highgui/src/files_Qt/Milky/64/9.png index 5eab48e62..cce6c7ef8 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/9.png and b/modules/highgui/src/files_Qt/Milky/64/9.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/94.png b/modules/highgui/src/files_Qt/Milky/64/94.png index 9be117889..aca0ff936 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/94.png and b/modules/highgui/src/files_Qt/Milky/64/94.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/98.png b/modules/highgui/src/files_Qt/Milky/64/98.png index a2e58c7bb..0a11c643b 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/98.png and b/modules/highgui/src/files_Qt/Milky/64/98.png differ diff --git a/modules/highgui/src/files_Qt/Milky/64/99.png b/modules/highgui/src/files_Qt/Milky/64/99.png index 5f952b8b5..122cf2de0 100644 Binary files a/modules/highgui/src/files_Qt/Milky/64/99.png and b/modules/highgui/src/files_Qt/Milky/64/99.png differ diff --git a/modules/highgui/src/grfmt_png.cpp b/modules/highgui/src/grfmt_png.cpp index 3105f790a..c784d5a5b 100644 --- a/modules/highgui/src/grfmt_png.cpp +++ b/modules/highgui/src/grfmt_png.cpp @@ -277,6 +277,7 @@ bool PngDecoder::readData( Mat& img ) else png_set_rgb_to_gray( png_ptr, 1, 0.299, 0.587 ); // RGB->Gray + png_set_interlace_handling( png_ptr ); png_read_update_info( png_ptr, info_ptr ); for( y = 0; y < m_height; y++ ) diff --git a/modules/imgproc/src/canny.cpp b/modules/imgproc/src/canny.cpp index 4dae01343..fb6afaf2b 100644 --- a/modules/imgproc/src/canny.cpp +++ b/modules/imgproc/src/canny.cpp @@ -41,6 +41,50 @@ #include "precomp.hpp" +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +#define USE_IPP_CANNY 1 +#else +#undef USE_IPP_CANNY +#endif + +#ifdef USE_IPP_CANNY +namespace cv +{ +static bool ippCanny(const Mat& _src, Mat& _dst, float low, float high) +{ + int size = 0, size1 = 0; + IppiSize roi = { _src.cols, _src.rows }; + + ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size); + ippiFilterSobelHorizGetBufferSize_8u16s_C1R(roi, ippMskSize3x3, &size1); + size = std::max(size, size1); + ippiCannyGetSize(roi, &size1); + size = std::max(size, size1); + + AutoBuffer buf(size + 64); + uchar* buffer = alignPtr((uchar*)buf, 32); + + Mat _dx(_src.rows, _src.cols, CV_16S); + if( ippiFilterSobelNegVertBorder_8u16s_C1R(_src.data, (int)_src.step, + _dx.ptr(), (int)_dx.step, roi, + ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) + return false; + + Mat _dy(_src.rows, _src.cols, CV_16S); + if( ippiFilterSobelHorizBorder_8u16s_C1R(_src.data, (int)_src.step, + _dy.ptr(), (int)_dy.step, roi, + ippMskSize3x3, ippBorderRepl, 0, buffer) < 0 ) + return false; + + if( ippiCanny_16s8u_C1R(_dx.ptr(), (int)_dx.step, + _dy.ptr(), (int)_dy.step, + _dst.data, (int)_dst.step, roi, low, high, buffer) < 0 ) + return false; + return true; +} +} +#endif + void cv::Canny( InputArray _src, OutputArray _dst, double low_thresh, double high_thresh, int aperture_size, bool L2gradient ) @@ -61,20 +105,26 @@ void cv::Canny( InputArray _src, OutputArray _dst, if ((aperture_size & 1) == 0 || (aperture_size != -1 && (aperture_size < 3 || aperture_size > 7))) CV_Error(CV_StsBadFlag, ""); + if (low_thresh > high_thresh) + std::swap(low_thresh, high_thresh); + #ifdef HAVE_TEGRA_OPTIMIZATION if (tegra::canny(src, dst, low_thresh, high_thresh, aperture_size, L2gradient)) return; #endif +#ifdef USE_IPP_CANNY + if( aperture_size == 3 && !L2gradient && + ippCanny(src, dst, low_thresh, high_thresh) >= 0 ) + return; +#endif + const int cn = src.channels(); - cv::Mat dx(src.rows, src.cols, CV_16SC(cn)); - cv::Mat dy(src.rows, src.cols, CV_16SC(cn)); + Mat dx(src.rows, src.cols, CV_16SC(cn)); + Mat dy(src.rows, src.cols, CV_16SC(cn)); - cv::Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE); - cv::Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE); - - if (low_thresh > high_thresh) - std::swap(low_thresh, high_thresh); + Sobel(src, dx, CV_16S, 1, 0, aperture_size, 1, 0, cv::BORDER_REPLICATE); + Sobel(src, dy, CV_16S, 0, 1, aperture_size, 1, 0, cv::BORDER_REPLICATE); if (L2gradient) { @@ -88,7 +138,7 @@ void cv::Canny( InputArray _src, OutputArray _dst, int high = cvFloor(high_thresh); ptrdiff_t mapstep = src.cols + 2; - cv::AutoBuffer buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int)); + AutoBuffer buffer((src.cols+2)*(src.rows+2) + cn * mapstep * 3 * sizeof(int)); int* mag_buf[3]; mag_buf[0] = (int*)(uchar*)buffer; diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index b6192e2b1..e8556d460 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -94,6 +94,13 @@ #define CV_DESCALE(x,n) (((x) + (1 << ((n)-1))) >> (n)) +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +#define MAX_IPP8u 255 +#define MAX_IPP16u 65535 +#define MAX_IPP32f 1.0 +static IppStatus sts = ippInit(); +#endif + namespace cv { @@ -191,6 +198,301 @@ void CvtColorLoop(const Mat& src, Mat& dst, const Cvt& cvt) parallel_for_(Range(0, src.rows), CvtColorLoop_Invoker(src, dst, cvt), src.total()/(double)(1<<16) ); } +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) +typedef IppStatus (CV_STDCALL* ippiReorderFunc)(const void *, int, void *, int, IppiSize, const int *); +typedef IppStatus (CV_STDCALL* ippiGeneralFunc)(const void *, int, void *, int, IppiSize); +typedef IppStatus (CV_STDCALL* ippiColor2GrayFunc)(const void *, int, void *, int, IppiSize, const Ipp32f *); + +template +class CvtColorIPPLoop_Invoker : public ParallelLoopBody +{ +public: + + CvtColorIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Cvt& _cvt, bool *_ok) : + ParallelLoopBody(), src(_src), dst(_dst), cvt(_cvt), ok(_ok) + { + *ok = true; + } + + virtual void operator()(const Range& range) const + { + const void *yS = src.ptr(range.start); + void *yD = dst.ptr(range.start); + if( cvt(yS, (int)src.step[0], yD, (int)dst.step[0], src.cols, range.end - range.start) < 0 ) + *ok = false; + } + +private: + const Mat& src; + Mat& dst; + const Cvt& cvt; + bool *ok; + + const CvtColorIPPLoop_Invoker& operator= (const CvtColorIPPLoop_Invoker&); +}; + +template +bool CvtColorIPPLoop(const Mat& src, Mat& dst, const Cvt& cvt) +{ + bool ok; + parallel_for_(Range(0, src.rows), CvtColorIPPLoop_Invoker(src, dst, cvt, &ok), src.total()/(double)(1<<16) ); + return ok; +} + +template +bool CvtColorIPPLoopCopy(Mat& src, Mat& dst, const Cvt& cvt) +{ + Mat temp; + Mat &source = src; + if( src.data == dst.data ) + { + src.copyTo(temp); + source = temp; + } + bool ok; + parallel_for_(Range(0, source.rows), CvtColorIPPLoop_Invoker(source, dst, cvt, &ok), source.total()/(double)(1<<16) ); + return ok; +} + +IppStatus __stdcall ippiSwapChannels_8u_C3C4Rf(const Ipp8u* pSrc, int srcStep, Ipp8u* pDst, int dstStep, + IppiSize roiSize, const int *dstOrder) +{ + return ippiSwapChannels_8u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP8u); +} + +IppStatus __stdcall ippiSwapChannels_16u_C3C4Rf(const Ipp16u* pSrc, int srcStep, Ipp16u* pDst, int dstStep, + IppiSize roiSize, const int *dstOrder) +{ + return ippiSwapChannels_16u_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP16u); +} + +IppStatus __stdcall ippiSwapChannels_32f_C3C4Rf(const Ipp32f* pSrc, int srcStep, Ipp32f* pDst, int dstStep, + IppiSize roiSize, const int *dstOrder) +{ + return ippiSwapChannels_32f_C3C4R(pSrc, srcStep, pDst, dstStep, roiSize, dstOrder, MAX_IPP32f); +} + +static ippiReorderFunc ippiSwapChannelsC3C4RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_C3C4Rf, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3C4Rf, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C3C4Rf, 0, 0 +}; + +static ippiGeneralFunc ippiCopyAC4C3RTab[] = +{ + (ippiGeneralFunc)ippiCopy_8u_AC4C3R, 0, (ippiGeneralFunc)ippiCopy_16u_AC4C3R, 0, + 0, (ippiGeneralFunc)ippiCopy_32f_AC4C3R, 0, 0 +}; + +static ippiReorderFunc ippiSwapChannelsC4C3RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_C4C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C4C3R, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C4C3R, 0, 0 +}; + +static ippiReorderFunc ippiSwapChannelsC3RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_C3R, 0, (ippiReorderFunc)ippiSwapChannels_16u_C3R, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_C3R, 0, 0 +}; + +static ippiReorderFunc ippiSwapChannelsC4RTab[] = +{ + (ippiReorderFunc)ippiSwapChannels_8u_AC4R, 0, (ippiReorderFunc)ippiSwapChannels_16u_AC4R, 0, + 0, (ippiReorderFunc)ippiSwapChannels_32f_AC4R, 0, 0 +}; + +static ippiColor2GrayFunc ippiColor2GrayC3Tab[] = +{ + (ippiColor2GrayFunc)ippiColorToGray_8u_C3C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_C3C1R, 0, + 0, (ippiColor2GrayFunc)ippiColorToGray_32f_C3C1R, 0, 0 +}; + +static ippiColor2GrayFunc ippiColor2GrayC4Tab[] = +{ + (ippiColor2GrayFunc)ippiColorToGray_8u_AC4C1R, 0, (ippiColor2GrayFunc)ippiColorToGray_16u_AC4C1R, 0, + 0, (ippiColor2GrayFunc)ippiColorToGray_32f_AC4C1R, 0, 0 +}; + +static ippiGeneralFunc ippiRGB2GrayC3Tab[] = +{ + (ippiGeneralFunc)ippiRGBToGray_8u_C3C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_C3C1R, 0, + 0, (ippiGeneralFunc)ippiRGBToGray_32f_C3C1R, 0, 0 +}; + +static ippiGeneralFunc ippiRGB2GrayC4Tab[] = +{ + (ippiGeneralFunc)ippiRGBToGray_8u_AC4C1R, 0, (ippiGeneralFunc)ippiRGBToGray_16u_AC4C1R, 0, + 0, (ippiGeneralFunc)ippiRGBToGray_32f_AC4C1R, 0, 0 +}; + +static ippiGeneralFunc ippiCopyP3C3RTab[] = +{ + (ippiGeneralFunc)ippiCopy_8u_P3C3R, 0, (ippiGeneralFunc)ippiCopy_16u_P3C3R, 0, + 0, (ippiGeneralFunc)ippiCopy_32f_P3C3R, 0, 0 +}; + +static ippiGeneralFunc ippiRGB2XYZTab[] = +{ + (ippiGeneralFunc)ippiRGBToXYZ_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToXYZ_16u_C3R, 0, + 0, (ippiGeneralFunc)ippiRGBToXYZ_32f_C3R, 0, 0 +}; + +static ippiGeneralFunc ippiXYZ2RGBTab[] = +{ + (ippiGeneralFunc)ippiXYZToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiXYZToRGB_16u_C3R, 0, + 0, (ippiGeneralFunc)ippiXYZToRGB_32f_C3R, 0, 0 +}; + +static ippiGeneralFunc ippiRGB2HSVTab[] = +{ + (ippiGeneralFunc)ippiRGBToHSV_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHSV_16u_C3R, 0, + 0, 0, 0, 0 +}; + +static ippiGeneralFunc ippiHSV2RGBTab[] = +{ + (ippiGeneralFunc)ippiHSVToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHSVToRGB_16u_C3R, 0, + 0, 0, 0, 0 +}; + +static ippiGeneralFunc ippiRGB2HLSTab[] = +{ + (ippiGeneralFunc)ippiRGBToHLS_8u_C3R, 0, (ippiGeneralFunc)ippiRGBToHLS_16u_C3R, 0, + 0, (ippiGeneralFunc)ippiRGBToHLS_32f_C3R, 0, 0 +}; + +static ippiGeneralFunc ippiHLS2RGBTab[] = +{ + (ippiGeneralFunc)ippiHLSToRGB_8u_C3R, 0, (ippiGeneralFunc)ippiHLSToRGB_16u_C3R, 0, + 0, (ippiGeneralFunc)ippiHLSToRGB_32f_C3R, 0, 0 +}; + +struct IPPGeneralFunctor +{ + IPPGeneralFunctor(ippiGeneralFunc _func) : func(_func){} + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + return func(src, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0; + } +private: + ippiGeneralFunc func; +}; + +struct IPPReorderFunctor +{ + IPPReorderFunctor(ippiReorderFunc _func, int _order0, int _order1, int _order2) : func(_func) + { + order[0] = _order0; + order[1] = _order1; + order[2] = _order2; + order[3] = 3; + } + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), order) >= 0; + } +private: + ippiReorderFunc func; + int order[4]; +}; + +struct IPPColor2GrayFunctor +{ + IPPColor2GrayFunctor(ippiColor2GrayFunc _func) : func(_func) + { + coeffs[0] = 0.114f; + coeffs[1] = 0.587f; + coeffs[2] = 0.299f; + } + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + return func(src, srcStep, dst, dstStep, ippiSize(cols, rows), coeffs) >= 0; + } +private: + ippiColor2GrayFunc func; + Ipp32f coeffs[3]; +}; + +struct IPPGray2BGRFunctor +{ + IPPGray2BGRFunctor(ippiGeneralFunc _func) : func(_func){} + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + const void* srcarray[3] = { src, src, src }; + return func(srcarray, srcStep, dst, dstStep, ippiSize(cols, rows)) >= 0; + } +private: + ippiGeneralFunc func; +}; + +struct IPPGray2BGRAFunctor +{ + IPPGray2BGRAFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _depth) : func1(_func1), func2(_func2), depth(_depth){} + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + const void* srcarray[3] = { src, src, src }; + Mat temp(rows, cols, CV_MAKETYPE(depth, 3)); + if(func1(srcarray, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0) + return false; + int order[4] = {0, 1, 2, 3}; + return func2(temp.data, (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0; + } +private: + ippiGeneralFunc func1; + ippiReorderFunc func2; + int depth; +}; + +struct IPPReorderGeneralFunctor +{ + IPPReorderGeneralFunctor(ippiReorderFunc _func1, ippiGeneralFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth) + { + order[0] = _order0; + order[1] = _order1; + order[2] = _order2; + order[3] = 3; + } + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + Mat temp; + temp.create(rows, cols, CV_MAKETYPE(depth, 3)); + if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows), order) < 0) + return false; + return func2(temp.data, (int)temp.step[0], dst, dstStep, ippiSize(cols, rows)) >= 0; + } +private: + ippiReorderFunc func1; + ippiGeneralFunc func2; + int order[4]; + int depth; +}; + +struct IPPGeneralReorderFunctor +{ + IPPGeneralReorderFunctor(ippiGeneralFunc _func1, ippiReorderFunc _func2, int _order0, int _order1, int _order2, int _depth) : func1(_func1), func2(_func2), depth(_depth) + { + order[0] = _order0; + order[1] = _order1; + order[2] = _order2; + order[3] = 3; + } + bool operator()(const void *src, int srcStep, void *dst, int dstStep, int cols, int rows) const + { + Mat temp; + temp.create(rows, cols, CV_MAKETYPE(depth, 3)); + if(func1(src, srcStep, temp.data, (int)temp.step[0], ippiSize(cols, rows)) < 0) + return false; + return func2(temp.data, (int)temp.step[0], dst, dstStep, ippiSize(cols, rows), order) >= 0; + } +private: + ippiGeneralFunc func1; + ippiReorderFunc func2; + int order[4]; + int depth; +}; +#endif + ////////////////// Various 3/4-channel to 3/4-channel RGB transformations ///////////////// template struct RGB2RGB @@ -2410,6 +2712,39 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create( sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( code == CV_BGR2BGRA || code == CV_RGB2RGBA) + { + if ( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 0, 1, 2)) ) + return; + } + else if( code == CV_BGRA2BGR ) + { + if ( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiCopyAC4C3RTab[depth])) ) + return; + } + else if( code == CV_BGR2RGBA ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC3C4RTab[depth], 2, 1, 0)) ) + return; + } + else if( code == CV_RGBA2BGR ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderFunctor(ippiSwapChannelsC4C3RTab[depth], 2, 1, 0)) ) + return; + } + else if( code == CV_RGB2BGR ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC3RTab[depth], 2, 1, 0)) ) + return; + } + else if( code == CV_RGBA2BGRA ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPReorderFunctor(ippiSwapChannelsC4RTab[depth], 2, 1, 0)) ) + return; + } +#endif + if( depth == CV_8U ) { #ifdef HAVE_TEGRA_OPTIMIZATION @@ -2463,6 +2798,29 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, 1)); dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( code == CV_BGR2GRAY ) + { + if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC3Tab[depth])) ) + return; + } + else if( code == CV_RGB2GRAY ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC3Tab[depth])) ) + return; + } + else if( code == CV_BGRA2GRAY ) + { + if( CvtColorIPPLoop(src, dst, IPPColor2GrayFunctor(ippiColor2GrayC4Tab[depth])) ) + return; + } + else if( code == CV_RGBA2GRAY ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralFunctor(ippiRGB2GrayC4Tab[depth])) ) + return; + } +#endif + bidx = code == CV_BGR2GRAY || code == CV_BGRA2GRAY ? 0 : 2; if( depth == CV_8U ) @@ -2492,6 +2850,20 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( code == CV_GRAY2BGR ) + { + if( CvtColorIPPLoop(src, dst, IPPGray2BGRFunctor(ippiCopyP3C3RTab[depth])) ) + return; + } + else if( code == CV_GRAY2BGRA ) + { + if( CvtColorIPPLoop(src, dst, IPPGray2BGRAFunctor(ippiCopyP3C3RTab[depth], ippiSwapChannelsC3C4RTab[depth], depth)) ) + return; + } +#endif + + if( depth == CV_8U ) { #ifdef HAVE_TEGRA_OPTIMIZATION @@ -2571,6 +2943,29 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, 3)); dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( code == CV_BGR2XYZ && scn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_BGR2XYZ && scn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_RGB2XYZ && scn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2XYZTab[depth])) ) + return; + } + else if( code == CV_RGB2XYZ && scn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2XYZTab[depth], 0, 1, 2, depth)) ) + return; + } +#endif + if( depth == CV_8U ) CvtColorLoop(src, dst, RGB2XYZ_i(scn, bidx, 0)); else if( depth == CV_16U ) @@ -2587,6 +2982,29 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( code == CV_XYZ2BGR && dcn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_XYZ2BGR && dcn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) + return; + } + if( code == CV_XYZ2RGB && dcn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiXYZ2RGBTab[depth])) ) + return; + } + else if( code == CV_XYZ2RGB && dcn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiXYZ2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) + return; + } +#endif + if( depth == CV_8U ) CvtColorLoop(src, dst, XYZ2RGB_i(dcn, bidx, 0)); else if( depth == CV_16U ) @@ -2607,6 +3025,52 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, 3)); dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( depth == CV_8U || depth == CV_16U ) + { + if( code == CV_BGR2HSV_FULL && scn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_BGR2HSV_FULL && scn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_RGB2HSV_FULL && scn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HSVTab[depth])) ) + return; + } + else if( code == CV_RGB2HSV_FULL && scn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HSVTab[depth], 0, 1, 2, depth)) ) + return; + } + else if( code == CV_BGR2HLS_FULL && scn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_BGR2HLS_FULL && scn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_RGB2HLS_FULL && scn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiRGB2HLSTab[depth])) ) + return; + } + else if( code == CV_RGB2HLS_FULL && scn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPReorderGeneralFunctor(ippiSwapChannelsC4C3RTab[depth], ippiRGB2HLSTab[depth], 0, 1, 2, depth)) ) + return; + } + } +#endif + if( code == CV_BGR2HSV || code == CV_RGB2HSV || code == CV_BGR2HSV_FULL || code == CV_RGB2HSV_FULL ) { @@ -2642,6 +3106,52 @@ void cv::cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) _dst.create(sz, CV_MAKETYPE(depth, dcn)); dst = _dst.getMat(); +#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) + if( depth == CV_8U || depth == CV_16U ) + { + if( code == CV_HSV2BGR_FULL && dcn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_HSV2BGR_FULL && dcn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_HSV2RGB_FULL && dcn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHSV2RGBTab[depth])) ) + return; + } + else if( code == CV_HSV2RGB_FULL && dcn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHSV2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) + return; + } + else if( code == CV_HLS2BGR_FULL && dcn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_HLS2BGR_FULL && dcn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 2, 1, 0, depth)) ) + return; + } + else if( code == CV_HLS2RGB_FULL && dcn == 3 ) + { + if( CvtColorIPPLoopCopy(src, dst, IPPGeneralFunctor(ippiHLS2RGBTab[depth])) ) + return; + } + else if( code == CV_HLS2RGB_FULL && dcn == 4 ) + { + if( CvtColorIPPLoop(src, dst, IPPGeneralReorderFunctor(ippiHLS2RGBTab[depth], ippiSwapChannelsC3C4RTab[depth], 0, 1, 2, depth)) ) + return; + } + } +#endif + if( code == CV_HSV2BGR || code == CV_HSV2RGB || code == CV_HSV2BGR_FULL || code == CV_HSV2RGB_FULL ) { diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index 96b4bbef8..ef919027a 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1137,7 +1137,8 @@ private: }; #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) -static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, const Point &anchor) +static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kernel, + const Size& ksize, const Point &anchor, bool rectKernel) { int type = src.type(); const Mat* _src = &src; @@ -1149,55 +1150,65 @@ static bool IPPMorphReplicate(int op, const Mat &src, Mat &dst, const Mat &kerne } //DEPRECATED. Allocates and initializes morphology state structure for erosion or dilation operation. typedef IppStatus (CV_STDCALL* ippiMorphologyInitAllocFunc)(int, const void*, IppiSize, IppiPoint, IppiMorphState **); - ippiMorphologyInitAllocFunc ippInitAllocFunc = - type == CV_8UC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C1R : - type == CV_8UC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C3R : - type == CV_8UC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_8u_C4R : - type == CV_32FC1 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C1R : - type == CV_32FC3 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C3R : - type == CV_32FC4 ? (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_32f_C4R : - 0; - typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int, IppiSize, IppiBorderType, IppiMorphState *); - ippiMorphologyBorderReplicateFunc ippFunc = 0; - switch( op ) + typedef IppStatus (CV_STDCALL* ippiMorphologyBorderReplicateFunc)(const void*, int, void *, int, + IppiSize, IppiBorderType, IppiMorphState *); + typedef IppStatus (CV_STDCALL* ippiFilterMinMaxGetBufferSizeFunc)(int, IppiSize, int*); + typedef IppStatus (CV_STDCALL* ippiFilterMinMaxBorderReplicateFunc)(const void*, int, void*, int, + IppiSize, IppiSize, IppiPoint, void*); + + ippiMorphologyInitAllocFunc initAllocFunc = 0; + ippiMorphologyBorderReplicateFunc morphFunc = 0; + ippiFilterMinMaxGetBufferSizeFunc getBufSizeFunc = 0; + ippiFilterMinMaxBorderReplicateFunc morphRectFunc = 0; + + #define IPP_MORPH_CASE(type, flavor) \ + case type: \ + initAllocFunc = (ippiMorphologyInitAllocFunc)ippiMorphologyInitAlloc_##flavor; \ + morphFunc = op == MORPH_ERODE ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_##flavor : \ + (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_##flavor; \ + getBufSizeFunc = (ippiFilterMinMaxGetBufferSizeFunc)ippiFilterMinGetBufferSize_##flavor; \ + morphRectFunc = op == MORPH_ERODE ? (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMinBorderReplicate_##flavor : \ + (ippiFilterMinMaxBorderReplicateFunc)ippiFilterMaxBorderReplicate_##flavor; \ + break + + switch( type ) { - case MORPH_DILATE: - { - ippFunc = - type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C1R : - type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C3R : - type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_8u_C4R : - type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C1R : - type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C3R : - type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiDilateBorderReplicate_32f_C4R : - 0; - break; - } - case MORPH_ERODE: - { - ippFunc = - type == CV_8UC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C1R : - type == CV_8UC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C3R : - type == CV_8UC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_8u_C4R : - type == CV_32FC1 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C1R : - type == CV_32FC3 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C3R : - type == CV_32FC4 ? (ippiMorphologyBorderReplicateFunc)ippiErodeBorderReplicate_32f_C4R : - 0; - break; - } + IPP_MORPH_CASE(CV_8UC1, 8u_C1R); + IPP_MORPH_CASE(CV_8UC3, 8u_C3R); + IPP_MORPH_CASE(CV_8UC4, 8u_C4R); + IPP_MORPH_CASE(CV_32FC1, 32f_C1R); + IPP_MORPH_CASE(CV_32FC3, 32f_C3R); + IPP_MORPH_CASE(CV_32FC4, 32f_C4R); + default: + return false; } - if( ippFunc && ippInitAllocFunc) + #undef IPP_MORPH_CASE + + IppiSize roiSize = {src.cols, src.rows}; + IppiSize kernelSize = {ksize.width, ksize.height}; + IppiPoint point = {anchor.x, anchor.y}; + + if( !rectKernel && morphFunc && initAllocFunc ) { IppiMorphState* pState; - IppiSize roiSize = {src.cols, src.rows}; - IppiSize kernelSize = {kernel.cols, kernel.rows}; - IppiPoint point = {anchor.x, anchor.y}; - if( ippInitAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 ) + if( initAllocFunc( roiSize.width, kernel.data, kernelSize, point, &pState ) < 0 ) return false; - bool is_ok = ippFunc( _src->data, _src->step[0], dst.data, dst.step[0], roiSize, ippBorderRepl, pState ) >= 0; + bool is_ok = morphFunc( _src->data, (int)_src->step[0], + dst.data, (int)dst.step[0], + roiSize, ippBorderRepl, pState ) >= 0; ippiMorphologyFree(pState); return is_ok; } + else if( rectKernel && morphRectFunc && getBufSizeFunc ) + { + int bufSize = 0; + if( getBufSizeFunc( src.cols, kernelSize, &bufSize) < 0 ) + return false; + AutoBuffer buf(bufSize + 64); + uchar* buffer = alignPtr((uchar*)buf, 32); + return morphRectFunc(_src->data, (int)_src->step[0], dst.data, (int)dst.step[0], + roiSize, kernelSize, point, buffer) >= 0; + } return false; } @@ -1211,7 +1222,7 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, !( borderType == cv::BORDER_REPLICATE || (borderType == cv::BORDER_CONSTANT && borderValue == morphologyDefaultBorderValue()) ) || !( op == MORPH_DILATE || op == MORPH_ERODE) ) return false; - if( borderType == cv::BORDER_CONSTANT ) + if( borderType == cv::BORDER_CONSTANT && kernel.data ) { int x, y; for( y = 0; y < kernel.rows; y++ ) @@ -1250,23 +1261,29 @@ static bool IPPMorphOp(int op, InputArray _src, OutputArray _dst, return true; } + bool rectKernel = false; if( !kernel.data ) { - kernel = getStructuringElement(MORPH_RECT, Size(1+iterations*2,1+iterations*2)); + ksize = Size(1+iterations*2,1+iterations*2); normanchor = Point(iterations, iterations); + rectKernel = true; iterations = 1; } - else if( iterations > 1 && countNonZero(kernel) == kernel.rows*kernel.cols ) + else if( iterations >= 1 && countNonZero(kernel) == kernel.rows*kernel.cols ) { + ksize = Size(ksize.width + (iterations-1)*(ksize.width-1), + ksize.height + (iterations-1)*(ksize.height-1)), normanchor = Point(normanchor.x*iterations, normanchor.y*iterations); - kernel = getStructuringElement(MORPH_RECT, - Size(ksize.width + (iterations-1)*(ksize.width-1), - ksize.height + (iterations-1)*(ksize.height-1)), - normanchor); + kernel = Mat(); + rectKernel = true; iterations = 1; } - return IPPMorphReplicate( op, src, dst, kernel, normanchor ); + // TODO: implement the case of iterations > 1. + if( iterations > 1 ) + return false; + + return IPPMorphReplicate( op, src, dst, kernel, ksize, normanchor, rectKernel ); } #endif @@ -1456,7 +1473,7 @@ static void convertConvKernel( const IplConvKernel* src, cv::Mat& dst, cv::Point int i, size = src->nRows*src->nCols; for( i = 0; i < size; i++ ) - dst.data[i] = (uchar)src->values[i]; + dst.data[i] = (uchar)(src->values[i] != 0); } diff --git a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp index 6c2f5156c..1361367fc 100644 --- a/modules/ocl/include/opencv2/ocl/matrix_operations.hpp +++ b/modules/ocl/include/opencv2/ocl/matrix_operations.hpp @@ -41,8 +41,8 @@ // //M*/ -#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ -#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ +#ifndef __OPENCV_OCL_MATRIX_OPERATIONS_HPP__ +#define __OPENCV_OCL_MATRIX_OPERATIONS_HPP__ namespace cv { @@ -514,4 +514,4 @@ namespace cv } /* end of namespace cv */ -#endif /* __OPENCV_GPU_MATRIX_OPERATIONS_HPP__ */ +#endif /* __OPENCV_OCL_MATRIX_OPERATIONS_HPP__ */ diff --git a/modules/ocl/perf/perf_arithm.cpp b/modules/ocl/perf/perf_arithm.cpp index 1fb82f122..814b272f0 100644 --- a/modules/ocl/perf/perf_arithm.cpp +++ b/modules/ocl/perf/perf_arithm.cpp @@ -76,7 +76,7 @@ PERF_TEST_P(LUTFixture, LUT, { ocl::oclMat oclSrc(src), oclLut(lut), oclDst(srcSize, dstType); - TEST_CYCLE() cv::ocl::LUT(oclSrc, oclLut, oclDst); + OCL_TEST_CYCLE() cv::ocl::LUT(oclSrc, oclLut, oclDst); oclDst.download(dst); SANITY_CHECK(dst); @@ -111,7 +111,7 @@ PERF_TEST_P(ExpFixture, Exp, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc(src), oclDst(srcSize, src.type()); - TEST_CYCLE() cv::ocl::exp(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::exp(oclSrc, oclDst); oclDst.download(dst); @@ -150,7 +150,7 @@ PERF_TEST_P(LogFixture, Log, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc(src), oclDst(srcSize, src.type()); - TEST_CYCLE() cv::ocl::log(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::log(oclSrc, oclDst); oclDst.download(dst); @@ -190,7 +190,7 @@ PERF_TEST_P(AddFixture, Add, { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::add(oclSrc1, oclSrc2, oclDst); + OCL_TEST_CYCLE() cv::ocl::add(oclSrc1, oclSrc2, oclDst); oclDst.download(dst); @@ -229,7 +229,7 @@ PERF_TEST_P(MulFixture, Mul, ::testing::Combine(OCL_TYPICAL_MAT_SIZES, { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::multiply(oclSrc1, oclSrc2, oclDst); + OCL_TEST_CYCLE() cv::ocl::multiply(oclSrc1, oclSrc2, oclDst); oclDst.download(dst); @@ -275,7 +275,7 @@ PERF_TEST_P(DivFixture, Div, { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::divide(oclSrc1, oclSrc2, oclDst); + OCL_TEST_CYCLE() cv::ocl::divide(oclSrc1, oclSrc2, oclDst); oclDst.download(dst); @@ -312,7 +312,7 @@ PERF_TEST_P(AbsDiffFixture, Absdiff, { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::absdiff(oclSrc1, oclSrc2, oclDst); + OCL_TEST_CYCLE() cv::ocl::absdiff(oclSrc1, oclSrc2, oclDst); oclDst.download(dst); @@ -351,7 +351,7 @@ PERF_TEST_P(CartToPolarFixture, CartToPolar, OCL_TYPICAL_MAT_SIZES) ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst1(srcSize, src1.type()), oclDst2(srcSize, src1.type()); - TEST_CYCLE() cv::ocl::cartToPolar(oclSrc1, oclSrc2, oclDst1, oclDst2); + OCL_TEST_CYCLE() cv::ocl::cartToPolar(oclSrc1, oclSrc2, oclDst1, oclDst2); oclDst1.download(dst1); oclDst2.download(dst2); @@ -392,7 +392,7 @@ PERF_TEST_P(PolarToCartFixture, PolarToCart, OCL_TYPICAL_MAT_SIZES) ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst1(srcSize, src1.type()), oclDst2(srcSize, src1.type()); - TEST_CYCLE() cv::ocl::polarToCart(oclSrc1, oclSrc2, oclDst1, oclDst2); + OCL_TEST_CYCLE() cv::ocl::polarToCart(oclSrc1, oclSrc2, oclDst1, oclDst2); oclDst1.download(dst1); oclDst2.download(dst2); @@ -430,7 +430,7 @@ PERF_TEST_P(MagnitudeFixture, Magnitude, OCL_TYPICAL_MAT_SIZES) ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type()); - TEST_CYCLE() cv::ocl::magnitude(oclSrc1, oclSrc2, oclDst); + OCL_TEST_CYCLE() cv::ocl::magnitude(oclSrc1, oclSrc2, oclDst); oclDst.download(dst); @@ -465,7 +465,7 @@ PERF_TEST_P(TransposeFixture, Transpose, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::transpose(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::transpose(oclSrc, oclDst); oclDst.download(dst); @@ -500,7 +500,7 @@ PERF_TEST_P(FlipFixture, Flip, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::flip(oclSrc, oclDst, 0); + OCL_TEST_CYCLE() cv::ocl::flip(oclSrc, oclDst, 0); oclDst.download(dst); @@ -538,7 +538,7 @@ PERF_TEST_P(minMaxFixture, minMax, { ocl::oclMat oclSrc(src); - TEST_CYCLE() cv::ocl::minMax(oclSrc, &min_val, &max_val); + OCL_TEST_CYCLE() cv::ocl::minMax(oclSrc, &min_val, &max_val); ASSERT_GE(max_val, min_val); SANITY_CHECK(min_val); @@ -581,7 +581,7 @@ PERF_TEST_P(minMaxLocFixture, minMaxLoc, { ocl::oclMat oclSrc(src); - TEST_CYCLE() cv::ocl::minMaxLoc(oclSrc, &min_val, &max_val, &min_loc, &max_loc); + OCL_TEST_CYCLE() cv::ocl::minMaxLoc(oclSrc, &min_val, &max_val, &min_loc, &max_loc); ASSERT_GE(max_val, min_val); SANITY_CHECK(min_val); @@ -620,7 +620,7 @@ PERF_TEST_P(SumFixture, Sum, { ocl::oclMat oclSrc(src); - TEST_CYCLE() result = cv::ocl::sum(oclSrc); + OCL_TEST_CYCLE() result = cv::ocl::sum(oclSrc); SANITY_CHECK(result); } @@ -655,7 +655,7 @@ PERF_TEST_P(countNonZeroFixture, countNonZero, { ocl::oclMat oclSrc(src); - TEST_CYCLE() result = cv::ocl::countNonZero(oclSrc); + OCL_TEST_CYCLE() result = cv::ocl::countNonZero(oclSrc); SANITY_CHECK(result); } @@ -688,7 +688,7 @@ PERF_TEST_P(PhaseFixture, Phase, OCL_TYPICAL_MAT_SIZES) ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type()); - TEST_CYCLE() cv::ocl::phase(oclSrc1, oclSrc2, oclDst, 1); + OCL_TEST_CYCLE() cv::ocl::phase(oclSrc1, oclSrc2, oclDst, 1); oclDst.download(dst); @@ -725,7 +725,7 @@ PERF_TEST_P(BitwiseAndFixture, bitwise_and, { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type()); - TEST_CYCLE() cv::ocl::bitwise_and(oclSrc1, oclSrc2, oclDst); + OCL_TEST_CYCLE() cv::ocl::bitwise_and(oclSrc1, oclSrc2, oclDst); oclDst.download(dst); @@ -760,7 +760,7 @@ PERF_TEST_P(BitwiseAndFixture, bitwise_not, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::bitwise_not(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::bitwise_not(oclSrc, oclDst); oclDst.download(dst); @@ -795,7 +795,7 @@ PERF_TEST_P(CompareFixture, compare, { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, CV_8UC1); - TEST_CYCLE() cv::ocl::compare(oclSrc1, oclSrc2, oclDst, CMP_EQ); + OCL_TEST_CYCLE() cv::ocl::compare(oclSrc1, oclSrc2, oclDst, CMP_EQ); oclDst.download(dst); @@ -826,7 +826,7 @@ PERF_TEST_P(PowFixture, pow, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc(src), oclDst(srcSize, src.type()); - TEST_CYCLE() cv::ocl::pow(oclSrc, -2.0, oclDst); + OCL_TEST_CYCLE() cv::ocl::pow(oclSrc, -2.0, oclDst); oclDst.download(dst); @@ -858,7 +858,7 @@ PERF_TEST_P(MagnitudeSqrFixture, MagnitudeSqr, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, src1.type()); - TEST_CYCLE() cv::ocl::magnitudeSqr(oclSrc1, oclSrc2, oclDst); + OCL_TEST_CYCLE() cv::ocl::magnitudeSqr(oclSrc1, oclSrc2, oclDst); oclDst.download(dst); @@ -910,7 +910,7 @@ PERF_TEST_P(AddWeightedFixture, AddWeighted, { ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::addWeighted(oclSrc1, alpha, oclSrc2, beta, gama, oclDst); + OCL_TEST_CYCLE() cv::ocl::addWeighted(oclSrc1, alpha, oclSrc2, beta, gama, oclDst); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_blend.cpp b/modules/ocl/perf/perf_blend.cpp index ea53c8a66..018ec6315 100644 --- a/modules/ocl/perf/perf_blend.cpp +++ b/modules/ocl/perf/perf_blend.cpp @@ -97,7 +97,7 @@ PERF_TEST_P(blendLinearFixture, blendLinear, OCL_TYPICAL_MAT_SIZES) ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclDst; ocl::oclMat oclWeights1(weights1), oclWeights2(weights2); - TEST_CYCLE() cv::ocl::blendLinear(oclSrc1, oclSrc2, oclWeights1, oclWeights2, oclDst); + OCL_TEST_CYCLE() cv::ocl::blendLinear(oclSrc1, oclSrc2, oclWeights1, oclWeights2, oclDst); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_brute_force_matcher.cpp b/modules/ocl/perf/perf_brute_force_matcher.cpp index af93b1b10..33c42c72d 100644 --- a/modules/ocl/perf/perf_brute_force_matcher.cpp +++ b/modules/ocl/perf/perf_brute_force_matcher.cpp @@ -60,7 +60,7 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match, vector matches; Mat query(srcSize, CV_32F), train(srcSize, CV_32F); - declare.in(query, train).time(srcSize.height == 2000 ? 8 : 4 ); + declare.in(query, train).time(srcSize.height == 2000 ? 9 : 4 ); randu(query, 0.0f, 1.0f); randu(train, 0.0f, 1.0f); @@ -75,8 +75,12 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match, { ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist); ocl::oclMat oclQuery(query), oclTrain(train); + ocl::oclMat oclTrainIdx, oclDistance; - TEST_CYCLE() oclMatcher.match(oclQuery, oclTrain, matches); + OCL_TEST_CYCLE() + oclMatcher.matchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance); + + oclMatcher.matchDownload(oclTrainIdx, oclDistance, matches); SANITY_CHECK_MATCHES(matches); } @@ -85,7 +89,7 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_match, } PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch, - OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too many outliers + OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too big difference between implementations { const Size srcSize = GetParam(); @@ -96,11 +100,11 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch, declare.in(query, train); if (srcSize.height == 2000) - declare.time(8); + declare.time(9); if (RUN_PLAIN_IMPL) { - BFMatcher matcher (NORM_L2); + BFMatcher matcher(NORM_L2); TEST_CYCLE() matcher.knnMatch(query, train, matches, 2); std::vector & matches0 = matches[0], & matches1 = matches[1]; @@ -111,8 +115,12 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch, { ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist); ocl::oclMat oclQuery(query), oclTrain(train); + ocl::oclMat oclTrainIdx, oclDistance, oclAllDist; - TEST_CYCLE() oclMatcher.knnMatch(oclQuery, oclTrain, matches, 2); + OCL_TEST_CYCLE() + oclMatcher.knnMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclAllDist, 2); + + oclMatcher.knnMatchDownload(oclTrainIdx, oclDistance, matches); std::vector & matches0 = matches[0], & matches1 = matches[1]; SANITY_CHECK_MATCHES(matches0); @@ -122,8 +130,8 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_knnMatch, OCL_PERF_ELSE } -PERF_TEST_P(BruteForceMatcherFixture, DISABLED_radiusMatch, - OCL_BFMATCHER_TYPICAL_MAT_SIZES) // TODO too many outliers +PERF_TEST_P(BruteForceMatcherFixture, radiusMatch, + OCL_BFMATCHER_TYPICAL_MAT_SIZES) { const Size srcSize = GetParam(); @@ -131,15 +139,17 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_radiusMatch, vector > matches(2); Mat query(srcSize, CV_32F), train(srcSize, CV_32F); declare.in(query, train); - Mat trainIdx, distance, allDist; randu(query, 0.0f, 1.0f); randu(train, 0.0f, 1.0f); + if (srcSize.height == 2000) + declare.time(9.15); + if (RUN_PLAIN_IMPL) { - BFMatcher matcher (NORM_L2); - TEST_CYCLE() matcher.radiusMatch(query, matches, max_distance); + cv::BFMatcher matcher(NORM_L2); + TEST_CYCLE() matcher.radiusMatch(query, train, matches, max_distance); std::vector & matches0 = matches[0], & matches1 = matches[1]; SANITY_CHECK_MATCHES(matches0); @@ -149,8 +159,12 @@ PERF_TEST_P(BruteForceMatcherFixture, DISABLED_radiusMatch, { ocl::oclMat oclQuery(query), oclTrain(train); ocl::BruteForceMatcher_OCL_base oclMatcher(ocl::BruteForceMatcher_OCL_base::L2Dist); + ocl::oclMat oclTrainIdx, oclDistance, oclNMatches; - TEST_CYCLE() oclMatcher.radiusMatch(oclQuery, oclTrain, matches, max_distance); + OCL_TEST_CYCLE() + oclMatcher.radiusMatchSingle(oclQuery, oclTrain, oclTrainIdx, oclDistance, oclNMatches, max_distance); + + oclMatcher.radiusMatchDownload(oclTrainIdx, oclDistance, oclNMatches, matches); std::vector & matches0 = matches[0], & matches1 = matches[1]; SANITY_CHECK_MATCHES(matches0); diff --git a/modules/ocl/perf/perf_calib3d.cpp b/modules/ocl/perf/perf_calib3d.cpp index b99c7fdef..997e84856 100644 --- a/modules/ocl/perf/perf_calib3d.cpp +++ b/modules/ocl/perf/perf_calib3d.cpp @@ -48,7 +48,7 @@ ///////////// StereoMatchBM //////////////////////// -PERF_TEST(StereoMatchBMFixture, DISABLED_StereoMatchBM) // TODO doesn't work properly +PERF_TEST(StereoMatchBMFixture, StereoMatchBM) { Mat left_image = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE); Mat right_image = imread(getDataPath("gpu/stereobm/aloe-R.png"), cv::IMREAD_GRAYSCALE); @@ -69,20 +69,17 @@ PERF_TEST(StereoMatchBMFixture, DISABLED_StereoMatchBM) // TODO doesn't work pro oclDisp(left_image.size(), CV_16SC1); ocl::StereoBM_OCL oclBM(0, n_disp, winSize); - TEST_CYCLE() oclBM(oclLeft, oclRight, oclDisp); - - oclDisp.download(disp); - - SANITY_CHECK(disp); + OCL_TEST_CYCLE() oclBM(oclLeft, oclRight, oclDisp); } else if (RUN_PLAIN_IMPL) { Ptr bm = createStereoBM(n_disp, winSize); TEST_CYCLE() bm->compute(left_image, right_image, disp); - - SANITY_CHECK(disp); } else OCL_PERF_ELSE + + int value = 0; + SANITY_CHECK(value); } diff --git a/modules/ocl/perf/perf_canny.cpp b/modules/ocl/perf/perf_canny.cpp index 3a5c633f5..259684092 100644 --- a/modules/ocl/perf/perf_canny.cpp +++ b/modules/ocl/perf/perf_canny.cpp @@ -49,7 +49,7 @@ using namespace perf; ///////////// Canny //////////////////////// -PERF_TEST(CannyFixture, DISABLED_Canny) // TODO difference between implmentations +PERF_TEST(CannyFixture, Canny) { Mat img = imread(getDataPath("gpu/stereobm/aloe-L.png"), cv::IMREAD_GRAYSCALE), edges(img.size(), CV_8UC1); @@ -61,17 +61,16 @@ PERF_TEST(CannyFixture, DISABLED_Canny) // TODO difference between implmentation { ocl::oclMat oclImg(img), oclEdges(img.size(), CV_8UC1); - TEST_CYCLE() ocl::Canny(oclImg, oclEdges, 50.0, 100.0); + OCL_TEST_CYCLE() ocl::Canny(oclImg, oclEdges, 50.0, 100.0); oclEdges.download(edges); - - SANITY_CHECK(edges); } else if (RUN_PLAIN_IMPL) { TEST_CYCLE() Canny(img, edges, 50.0, 100.0); - - SANITY_CHECK(edges); } else OCL_PERF_ELSE + + int value = 0; + SANITY_CHECK(value); } diff --git a/modules/ocl/perf/perf_color.cpp b/modules/ocl/perf/perf_color.cpp index e66d9c071..b66fc2b0a 100644 --- a/modules/ocl/perf/perf_color.cpp +++ b/modules/ocl/perf/perf_color.cpp @@ -62,7 +62,7 @@ PERF_TEST_P(cvtColorFixture, cvtColor, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc(src), oclDst(src.size(), CV_8UC4); - TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, COLOR_RGBA2GRAY, 4); + OCL_TEST_CYCLE() ocl::cvtColor(oclSrc, oclDst, COLOR_RGBA2GRAY, 4); oclDst.download(dst); SANITY_CHECK(dst); diff --git a/modules/ocl/perf/perf_fft.cpp b/modules/ocl/perf/perf_fft.cpp index ae0291c3e..840f009a3 100644 --- a/modules/ocl/perf/perf_fft.cpp +++ b/modules/ocl/perf/perf_fft.cpp @@ -66,7 +66,7 @@ PERF_TEST_P(dftFixture, DISABLED_dft, OCL_TYPICAL_MAT_SIZES) // TODO not impleme { ocl::oclMat oclSrc(src), oclDst; - TEST_CYCLE() cv::ocl::dft(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::dft(oclSrc, oclDst); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_filters.cpp b/modules/ocl/perf/perf_filters.cpp index 588c0569e..28c290096 100644 --- a/modules/ocl/perf/perf_filters.cpp +++ b/modules/ocl/perf/perf_filters.cpp @@ -71,7 +71,7 @@ PERF_TEST_P(BlurFixture, Blur, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::blur(oclSrc, oclDst, ksize, Point(-1, -1), bordertype); + OCL_TEST_CYCLE() cv::ocl::blur(oclSrc, oclDst, ksize, Point(-1, -1), bordertype); oclDst.download(dst); @@ -109,7 +109,7 @@ PERF_TEST_P(LaplacianFixture, Laplacian, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::Laplacian(oclSrc, oclDst, -1, ksize, 1); + OCL_TEST_CYCLE() cv::ocl::Laplacian(oclSrc, oclDst, -1, ksize, 1); oclDst.download(dst); @@ -148,7 +148,7 @@ PERF_TEST_P(ErodeFixture, Erode, { ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKer(ker); - TEST_CYCLE() cv::ocl::erode(oclSrc, oclDst, oclKer); + OCL_TEST_CYCLE() cv::ocl::erode(oclSrc, oclDst, oclKer); oclDst.download(dst); @@ -189,7 +189,7 @@ PERF_TEST_P(SobelFixture, Sobel, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::Sobel(oclSrc, oclDst, -1, dx, dy); + OCL_TEST_CYCLE() cv::ocl::Sobel(oclSrc, oclDst, -1, dx, dy); oclDst.download(dst); @@ -230,7 +230,7 @@ PERF_TEST_P(ScharrFixture, Scharr, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::Scharr(oclSrc, oclDst, -1, dx, dy); + OCL_TEST_CYCLE() cv::ocl::Scharr(oclSrc, oclDst, -1, dx, dy); oclDst.download(dst); @@ -267,7 +267,7 @@ PERF_TEST_P(GaussianBlurFixture, GaussianBlur, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::GaussianBlur(oclSrc, oclDst, Size(ksize, ksize), 0); + OCL_TEST_CYCLE() cv::ocl::GaussianBlur(oclSrc, oclDst, Size(ksize, ksize), 0); oclDst.download(dst); @@ -306,7 +306,7 @@ PERF_TEST_P(filter2DFixture, filter2D, { ocl::oclMat oclSrc(src), oclDst(srcSize, type), oclKernel(kernel); - TEST_CYCLE() cv::ocl::filter2D(oclSrc, oclDst, -1, oclKernel); + OCL_TEST_CYCLE() cv::ocl::filter2D(oclSrc, oclDst, -1, oclKernel); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_gemm.cpp b/modules/ocl/perf/perf_gemm.cpp index fb68b92f7..aaa1dac23 100644 --- a/modules/ocl/perf/perf_gemm.cpp +++ b/modules/ocl/perf/perf_gemm.cpp @@ -51,13 +51,14 @@ using namespace perf; typedef TestBaseWithParam gemmFixture; -PERF_TEST_P(gemmFixture, DISABLED_gemm, OCL_TYPICAL_MAT_SIZES) // TODO not implemented +PERF_TEST_P(gemmFixture, DISABLED_gemm, + ::testing::Values(OCL_SIZE_1000, OCL_SIZE_2000)) // TODO not implemented { const Size srcSize = GetParam(); Mat src1(srcSize, CV_32FC1), src2(srcSize, CV_32FC1), src3(srcSize, CV_32FC1), dst(srcSize, CV_32FC1); - declare.in(src1, src2, src3).out(dst); + declare.in(src1, src2, src3).out(dst).time(srcSize == OCL_SIZE_2000 ? 65 : 8); randu(src1, -10.0f, 10.0f); randu(src2, -10.0f, 10.0f); randu(src3, -10.0f, 10.0f); @@ -67,7 +68,7 @@ PERF_TEST_P(gemmFixture, DISABLED_gemm, OCL_TYPICAL_MAT_SIZES) // TODO not imple ocl::oclMat oclSrc1(src1), oclSrc2(src2), oclSrc3(src3), oclDst(srcSize, CV_32FC1); - TEST_CYCLE() cv::ocl::gemm(oclSrc1, oclSrc2, 1.0, oclSrc3, 1.0, oclDst); + OCL_TEST_CYCLE() cv::ocl::gemm(oclSrc1, oclSrc2, 1.0, oclSrc3, 1.0, oclDst); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_gftt.cpp b/modules/ocl/perf/perf_gftt.cpp index 7fe16c208..8a29adc0c 100644 --- a/modules/ocl/perf/perf_gftt.cpp +++ b/modules/ocl/perf/perf_gftt.cpp @@ -77,7 +77,7 @@ PERF_TEST_P(GoodFeaturesToTrackFixture, GoodFeaturesToTrack, ocl::oclMat oclFrame(frame), pts_oclmat; ocl::GoodFeaturesToTrackDetector_OCL detector(maxCorners, qualityLevel, minDistance); - TEST_CYCLE() detector(oclFrame, pts_oclmat); + OCL_TEST_CYCLE() detector(oclFrame, pts_oclmat); detector.downloadPoints(pts_oclmat, pts_gold); diff --git a/modules/ocl/perf/perf_haar.cpp b/modules/ocl/perf/perf_haar.cpp index df619953e..9c258fe25 100644 --- a/modules/ocl/perf/perf_haar.cpp +++ b/modules/ocl/perf/perf_haar.cpp @@ -78,7 +78,7 @@ PERF_TEST(HaarFixture, Haar) ASSERT_TRUE(faceCascade.load(getDataPath("gpu/haarcascade/haarcascade_frontalface_alt.xml"))) << "can't load haarcascade_frontalface_alt.xml"; - TEST_CYCLE() faceCascade.detectMultiScale(oclImg, faces, + OCL_TEST_CYCLE() faceCascade.detectMultiScale(oclImg, faces, 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30)); SANITY_CHECK(faces, 4 + 1e-4); diff --git a/modules/ocl/perf/perf_hog.cpp b/modules/ocl/perf/perf_hog.cpp index 2288215f4..15846d831 100644 --- a/modules/ocl/perf/perf_hog.cpp +++ b/modules/ocl/perf/perf_hog.cpp @@ -72,7 +72,7 @@ PERF_TEST(HOGFixture, HOG) ocl_hog.setSVMDetector(ocl_hog.getDefaultPeopleDetector()); ocl::oclMat oclSrc(src); - TEST_CYCLE() ocl_hog.detectMultiScale(oclSrc, found_locations); + OCL_TEST_CYCLE() ocl_hog.detectMultiScale(oclSrc, found_locations); SANITY_CHECK(found_locations, 1 + DBL_EPSILON); } diff --git a/modules/ocl/perf/perf_imgproc.cpp b/modules/ocl/perf/perf_imgproc.cpp index 6b8f4ab34..cb1f8efa4 100644 --- a/modules/ocl/perf/perf_imgproc.cpp +++ b/modules/ocl/perf/perf_imgproc.cpp @@ -64,7 +64,7 @@ PERF_TEST_P(equalizeHistFixture, equalizeHist, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc(src), oclDst(srcSize, src.type()); - TEST_CYCLE() cv::ocl::equalizeHist(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::equalizeHist(oclSrc, oclDst); oclDst.download(dst); @@ -101,7 +101,7 @@ PERF_TEST_P(CopyMakeBorderFixture, CopyMakeBorder, { ocl::oclMat oclSrc(src), oclDst(dstSize, type); - TEST_CYCLE() cv::ocl::copyMakeBorder(oclSrc, oclDst, 7, 5, 5, 7, borderType, cv::Scalar(1.0)); + OCL_TEST_CYCLE() cv::ocl::copyMakeBorder(oclSrc, oclDst, 7, 5, 5, 7, borderType, cv::Scalar(1.0)); oclDst.download(dst); @@ -141,7 +141,7 @@ PERF_TEST_P(cornerMinEigenValFixture, cornerMinEigenVal, { ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1); - TEST_CYCLE() cv::ocl::cornerMinEigenVal(oclSrc, oclDst, blockSize, apertureSize, borderType); + OCL_TEST_CYCLE() cv::ocl::cornerMinEigenVal(oclSrc, oclDst, blockSize, apertureSize, borderType); oclDst.download(dst); @@ -178,7 +178,7 @@ PERF_TEST_P(cornerHarrisFixture, cornerHarris, { ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1); - TEST_CYCLE() cv::ocl::cornerHarris(oclSrc, oclDst, 5, 7, 0.1, borderType); + OCL_TEST_CYCLE() cv::ocl::cornerHarris(oclSrc, oclDst, 5, 7, 0.1, borderType); oclDst.download(dst); @@ -209,7 +209,7 @@ PERF_TEST_P(integralFixture, DISABLED_integral, OCL_TYPICAL_MAT_SIZES) // TODO d { ocl::oclMat oclSrc(src), oclDst; - TEST_CYCLE() cv::ocl::integral(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::integral(oclSrc, oclDst); oclDst.download(dst); @@ -252,7 +252,7 @@ PERF_TEST_P(WarpAffineFixture, WarpAffine, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation); + OCL_TEST_CYCLE() cv::ocl::warpAffine(oclSrc, oclDst, M, srcSize, interpolation); oclDst.download(dst); @@ -297,7 +297,7 @@ PERF_TEST_P(WarpPerspectiveFixture, WarpPerspective, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation); + OCL_TEST_CYCLE() cv::ocl::warpPerspective(oclSrc, oclDst, M, srcSize, interpolation); oclDst.download(dst); @@ -342,7 +342,7 @@ PERF_TEST_P(resizeFixture, resize, { ocl::oclMat oclSrc(src), oclDst(dstSize, type); - TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType); + OCL_TEST_CYCLE() cv::ocl::resize(oclSrc, oclDst, Size(), scale, scale, interType); oclDst.download(dst); @@ -381,7 +381,7 @@ PERF_TEST_P(ThreshFixture, threshold, { ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8U); - TEST_CYCLE() cv::ocl::threshold(oclSrc, oclDst, 50.0, 0.0, threshType); + OCL_TEST_CYCLE() cv::ocl::threshold(oclSrc, oclDst, 50.0, 0.0, threshType); oclDst.download(dst); @@ -611,7 +611,7 @@ PERF_TEST_P(meanShiftFilteringFixture, meanShiftFiltering, { ocl::oclMat oclSrc(src), oclDst(srcSize, CV_8UC4); - TEST_CYCLE() ocl::meanShiftFiltering(oclSrc, oclDst, sp, sr, crit); + OCL_TEST_CYCLE() ocl::meanShiftFiltering(oclSrc, oclDst, sp, sr, crit); oclDst.download(dst); @@ -706,7 +706,7 @@ PERF_TEST_P(meanShiftProcFixture, meanShiftProc, ocl::oclMat oclSrc(src), oclDst1(srcSize, CV_8UC4), oclDst2(srcSize, CV_16SC2); - TEST_CYCLE() ocl::meanShiftProc(oclSrc, oclDst1, oclDst2, 5, 6, crit); + OCL_TEST_CYCLE() ocl::meanShiftProc(oclSrc, oclDst1, oclDst2, 5, 6, crit); oclDst1.download(dst1); oclDst2.download(dst2); @@ -763,7 +763,7 @@ PERF_TEST_P(remapFixture, remap, ocl::oclMat oclSrc(src), oclDst(srcSize, type); ocl::oclMat oclXMap(xmap), oclYMap(ymap); - TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode); + OCL_TEST_CYCLE() cv::ocl::remap(oclSrc, oclDst, oclXMap, oclYMap, interpolation, borderMode); oclDst.download(dst); @@ -800,7 +800,7 @@ PERF_TEST_P(CLAHEFixture, CLAHE, OCL_TYPICAL_MAT_SIZES) ocl::oclMat oclSrc(src), oclDst; cv::Ptr oclClahe = cv::ocl::createCLAHE(clipLimit); - TEST_CYCLE() oclClahe->apply(oclSrc, oclDst); + OCL_TEST_CYCLE() oclClahe->apply(oclSrc, oclDst); oclDst.download(dst); @@ -845,7 +845,7 @@ PERF_TEST_P(columnSumFixture, columnSum, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc(src), oclDst(srcSize, CV_32FC1); - TEST_CYCLE() cv::ocl::columnSum(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::columnSum(oclSrc, oclDst); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_match_template.cpp b/modules/ocl/perf/perf_match_template.cpp index d9f8f449a..869e01e60 100644 --- a/modules/ocl/perf/perf_match_template.cpp +++ b/modules/ocl/perf/perf_match_template.cpp @@ -72,7 +72,7 @@ PERF_TEST_P(CV_TM_CCORRFixture, matchTemplate, { ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_32F); - TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR); + OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR); oclDst.download(dst); @@ -104,7 +104,7 @@ PERF_TEST_P(CV_TM_CCORR_NORMEDFixture, matchTemplate, OCL_TYPICAL_MAT_SIZES) { ocl::oclMat oclSrc(src), oclTempl(templ), oclDst(dstSize, CV_8UC1); - TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR_NORMED); + OCL_TEST_CYCLE() cv::ocl::matchTemplate(oclSrc, oclTempl, oclDst, TM_CCORR_NORMED); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_matrix_operation.cpp b/modules/ocl/perf/perf_matrix_operation.cpp index 1fe4616ac..ad1327503 100644 --- a/modules/ocl/perf/perf_matrix_operation.cpp +++ b/modules/ocl/perf/perf_matrix_operation.cpp @@ -70,7 +70,7 @@ PERF_TEST_P(ConvertToFixture, ConvertTo, { ocl::oclMat oclSrc(src), oclDst(srcSize, dstType); - TEST_CYCLE() oclSrc.convertTo(oclDst, dstType); + OCL_TEST_CYCLE() oclSrc.convertTo(oclDst, dstType); oclDst.download(dst); @@ -105,7 +105,7 @@ PERF_TEST_P(copyToFixture, copyTo, { ocl::oclMat oclSrc(src), oclDst(srcSize, type); - TEST_CYCLE() oclSrc.copyTo(oclDst); + OCL_TEST_CYCLE() oclSrc.copyTo(oclDst); oclDst.download(dst); @@ -141,7 +141,7 @@ PERF_TEST_P(setToFixture, setTo, { ocl::oclMat oclSrc(srcSize, type); - TEST_CYCLE() oclSrc.setTo(val); + OCL_TEST_CYCLE() oclSrc.setTo(val); oclSrc.download(src); SANITY_CHECK(src); diff --git a/modules/ocl/perf/perf_moments.cpp b/modules/ocl/perf/perf_moments.cpp index 200a27db8..6ecc76651 100644 --- a/modules/ocl/perf/perf_moments.cpp +++ b/modules/ocl/perf/perf_moments.cpp @@ -72,7 +72,7 @@ PERF_TEST_P(MomentsFixture, DISABLED_Moments, { ocl::oclMat oclSrc(src); - TEST_CYCLE() mom = cv::ocl::ocl_moments(oclSrc, binaryImage); // TODO Use oclSrc + OCL_TEST_CYCLE() mom = cv::ocl::ocl_moments(oclSrc, binaryImage); // TODO Use oclSrc cv::HuMoments(mom, dst); SANITY_CHECK(dst); diff --git a/modules/ocl/perf/perf_norm.cpp b/modules/ocl/perf/perf_norm.cpp index 736645d90..363bcd2b3 100644 --- a/modules/ocl/perf/perf_norm.cpp +++ b/modules/ocl/perf/perf_norm.cpp @@ -68,7 +68,7 @@ PERF_TEST_P(normFixture, DISABLED_norm, OCL_TYPICAL_MAT_SIZES) // TODO doesn't w { ocl::oclMat oclSrc1(src1), oclSrc2(src2); - TEST_CYCLE() value = cv::ocl::norm(oclSrc1, oclSrc2, NORM_INF); + OCL_TEST_CYCLE() value = cv::ocl::norm(oclSrc1, oclSrc2, NORM_INF); SANITY_CHECK(value); } diff --git a/modules/ocl/perf/perf_opticalflow.cpp b/modules/ocl/perf/perf_opticalflow.cpp index 34651d1af..861307526 100644 --- a/modules/ocl/perf/perf_opticalflow.cpp +++ b/modules/ocl/perf/perf_opticalflow.cpp @@ -52,25 +52,13 @@ using std::tr1::get; using std::tr1::tuple; using std::tr1::make_tuple; -template -static vector & MatToVector(const ocl::oclMat & oclSrc, vector & instance) -{ - Mat src; - oclSrc.download(src); - - for (int i = 0; i < src.cols; ++i) - instance.push_back(src.at(0, i)); - - return instance; -} - CV_ENUM(LoadMode, IMREAD_GRAYSCALE, IMREAD_COLOR) typedef tuple > PyrLKOpticalFlowParamType; typedef TestBaseWithParam PyrLKOpticalFlowFixture; PERF_TEST_P(PyrLKOpticalFlowFixture, - DISABLED_PyrLKOpticalFlow, + PyrLKOpticalFlow, ::testing::Combine( ::testing::Values(1000, 2000, 4000), ::testing::Values( @@ -79,8 +67,8 @@ PERF_TEST_P(PyrLKOpticalFlowFixture, string("gpu/opticalflow/rubberwhale1.png"), string("gpu/opticalflow/rubberwhale2.png"), LoadMode(IMREAD_COLOR) - ) - , make_tuple + ), + make_tuple ( string("gpu/stereobm/aloe-L.png"), string("gpu/stereobm/aloe-R.png"), @@ -88,7 +76,7 @@ PERF_TEST_P(PyrLKOpticalFlowFixture, ) ) ) - ) // TODO to big difference between implementations + ) { PyrLKOpticalFlowParamType params = GetParam(); tuple fileParam = get<1>(params); @@ -98,6 +86,8 @@ PERF_TEST_P(PyrLKOpticalFlowFixture, Mat frame0 = imread(getDataPath(fileName0), openMode); Mat frame1 = imread(getDataPath(fileName1), openMode); + declare.in(frame0, frame1); + ASSERT_FALSE(frame0.empty()) << "can't load " << fileName0; ASSERT_FALSE(frame1.empty()) << "can't load " << fileName1; @@ -111,36 +101,28 @@ PERF_TEST_P(PyrLKOpticalFlowFixture, vector status; vector err; goodFeaturesToTrack(grayFrame, pts, pointsCount, 0.01, 0.0); + Mat ptsMat(1, static_cast(pts.size()), CV_32FC2, (void *)&pts[0]); if (RUN_PLAIN_IMPL) { TEST_CYCLE() cv::calcOpticalFlowPyrLK(frame0, frame1, pts, nextPts, status, err); - - SANITY_CHECK(nextPts); - SANITY_CHECK(status); - SANITY_CHECK(err); } else if (RUN_OCL_IMPL) { ocl::PyrLKOpticalFlow oclPyrLK; ocl::oclMat oclFrame0(frame0), oclFrame1(frame1); - ocl::oclMat oclPts(1, static_cast(pts.size()), CV_32FC2, (void *)&pts[0]); + ocl::oclMat oclPts(ptsMat); ocl::oclMat oclNextPts, oclStatus, oclErr; - TEST_CYCLE() + OCL_TEST_CYCLE() oclPyrLK.sparse(oclFrame0, oclFrame1, oclPts, oclNextPts, oclStatus, &oclErr); - - MatToVector(oclNextPts, nextPts); - MatToVector(oclStatus, status); - MatToVector(oclErr, err); - - SANITY_CHECK(nextPts); - SANITY_CHECK(status); - SANITY_CHECK(err); } else OCL_PERF_ELSE + + int value = 0; + SANITY_CHECK(value); } PERF_TEST(tvl1flowFixture, tvl1flow) @@ -175,7 +157,7 @@ PERF_TEST(tvl1flowFixture, tvl1flow) ocl::oclMat oclFrame0(frame0), oclFrame1(frame1), oclFlow1(srcSize, CV_32FC1), oclFlow2(srcSize, CV_32FC1); - TEST_CYCLE() oclAlg(oclFrame0, oclFrame1, oclFlow1, oclFlow2); + OCL_TEST_CYCLE() oclAlg(oclFrame0, oclFrame1, oclFlow1, oclFlow2); oclAlg.collectGarbage(); @@ -259,7 +241,7 @@ PERF_TEST_P(FarnebackOpticalFlowFixture, FarnebackOpticalFlow, farn.flags |= OPTFLOW_USE_INITIAL_FLOW; } - TEST_CYCLE() + OCL_TEST_CYCLE() farn(oclFrame0, oclFrame1, oclFlowx, oclFlowy); oclFlowx.download(flowx); diff --git a/modules/ocl/perf/perf_precomp.cpp b/modules/ocl/perf/perf_precomp.cpp deleted file mode 100644 index 74f3f0f33..000000000 --- a/modules/ocl/perf/perf_precomp.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2010-2012, Multicoreware, Inc., all rights reserved. -// Copyright (C) 2010-2012, Advanced Micro Devices, Inc., all rights reserved. -// Third party copyrights are property of their respective owners. - -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other oclMaterials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors as is and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" diff --git a/modules/ocl/perf/perf_precomp.hpp b/modules/ocl/perf/perf_precomp.hpp index 2904af916..ad908a75f 100644 --- a/modules/ocl/perf/perf_precomp.hpp +++ b/modules/ocl/perf/perf_precomp.hpp @@ -111,4 +111,8 @@ using namespace cv; CV_TEST_FAIL_NO_IMPL(); #endif +#define OCL_TEST_CYCLE_N(n) for(declare.iterations(n); startTimer(), next(); ocl::finish(), stopTimer()) +#define OCL_TEST_CYCLE() for(; startTimer(), next(); ocl::finish(), stopTimer()) +#define OCL_TEST_CYCLE_MULTIRUN(runsNum) for(declare.runs(runsNum); startTimer(), next(); stopTimer()) for(int r = 0; r < runsNum; ocl::finish(), ++r) + #endif diff --git a/modules/ocl/perf/perf_pyramid.cpp b/modules/ocl/perf/perf_pyramid.cpp index c7f949d2c..19c728bb7 100644 --- a/modules/ocl/perf/perf_pyramid.cpp +++ b/modules/ocl/perf/perf_pyramid.cpp @@ -70,7 +70,7 @@ PERF_TEST_P(pyrDownFixture, pyrDown, { ocl::oclMat oclSrc(src), oclDst(dstSize, type); - TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst); + OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst); oclDst.download(dst); @@ -107,7 +107,7 @@ PERF_TEST_P(pyrUpFixture, pyrUp, { ocl::oclMat oclSrc(src), oclDst(dstSize, type); - TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst); + OCL_TEST_CYCLE() ocl::pyrDown(oclSrc, oclDst); oclDst.download(dst); diff --git a/modules/ocl/perf/perf_split_merge.cpp b/modules/ocl/perf/perf_split_merge.cpp index 48d64c100..3821a8e16 100644 --- a/modules/ocl/perf/perf_split_merge.cpp +++ b/modules/ocl/perf/perf_split_merge.cpp @@ -78,7 +78,7 @@ PERF_TEST_P(MergeFixture, Merge, for (vector::size_type i = 0, end = src.size(); i < end; ++i) oclSrc[i] = src[i]; - TEST_CYCLE() cv::ocl::merge(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::merge(oclSrc, oclDst); oclDst.download(dst); @@ -114,7 +114,7 @@ PERF_TEST_P(SplitFixture, Split, ocl::oclMat oclSrc(src); vector oclDst(channels, ocl::oclMat(srcSize, CV_MAKE_TYPE(depth, 1))); - TEST_CYCLE() cv::ocl::split(oclSrc, oclDst); + OCL_TEST_CYCLE() cv::ocl::split(oclSrc, oclDst); ASSERT_EQ(3, channels); Mat dst0, dst1, dst2; diff --git a/modules/ocl/src/opencl/tvl1flow.cl b/modules/ocl/src/opencl/tvl1flow.cl index 4f8249602..095b339f8 100644 --- a/modules/ocl/src/opencl/tvl1flow.cl +++ b/modules/ocl/src/opencl/tvl1flow.cl @@ -341,7 +341,8 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx int u1_offset_x, int u1_offset_y, int u2_offset_x, - int u2_offset_y) + int u2_offset_y, + char calc_error) { //const int x = blockIdx.x * blockDim.x + threadIdx.x; @@ -399,9 +400,12 @@ __kernel void estimateUKernel(__global const float* I1wx, int I1wx_col, int I1wx u1[(y + u1_offset_y) * u1_step + x + u1_offset_x] = u1NewVal; u2[(y + u2_offset_y) * u2_step + x + u2_offset_x] = u2NewVal; - const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); - const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); - error[y * I1wx_step + x] = n1 + n2; + if(calc_error) + { + const float n1 = (u1OldVal - u1NewVal) * (u1OldVal - u1NewVal); + const float n2 = (u2OldVal - u2NewVal) * (u2OldVal - u2NewVal); + error[y * I1wx_step + x] = n1 + n2; + } } } diff --git a/modules/ocl/src/precomp.hpp b/modules/ocl/src/precomp.hpp index 270442889..cf8743fcc 100644 --- a/modules/ocl/src/precomp.hpp +++ b/modules/ocl/src/precomp.hpp @@ -70,8 +70,6 @@ #include "opencv2/core/utility.hpp" #include "opencv2/core/private.hpp" -//#include "opencv2/highgui.hpp" - #define __ATI__ #if defined (HAVE_OPENCL) diff --git a/modules/ocl/src/tvl1flow.cpp b/modules/ocl/src/tvl1flow.cpp index c2e85b6ae..daf3a2295 100644 --- a/modules/ocl/src/tvl1flow.cpp +++ b/modules/ocl/src/tvl1flow.cpp @@ -172,7 +172,7 @@ namespace ocl_tvl1flow void estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho_c, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, oclMat &u1, - oclMat &u2, oclMat &error, float l_t, float theta); + oclMat &u2, oclMat &error, float l_t, float theta, char calc_error); void estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, float taut); @@ -229,18 +229,29 @@ void cv::ocl::OpticalFlowDual_TVL1_OCL::procOneScale(const oclMat &I0, const ocl warpBackward(I0, I1, I1x, I1y, u1, u2, I1w, I1wx, I1wy, grad, rho_c); double error = numeric_limits::max(); + double prev_error = 0; for (int n = 0; error > scaledEpsilon && n < iterations; ++n) { + // some tweaks to make sum operation less frequently + char calc_error = (n & 0x1) && (prev_error < scaledEpsilon); estimateU(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, - u1, u2, diff, l_t, static_cast(theta)); - - error = ocl::sum(diff)[0]; - + u1, u2, diff, l_t, static_cast(theta), calc_error); + if(calc_error) + { + error = ocl::sum(diff)[0]; + prev_error = error; + } + else + { + error = numeric_limits::max(); + prev_error -= scaledEpsilon; + } estimateDualVariables(u1, u2, p11, p12, p21, p22, taut); } } + } void cv::ocl::OpticalFlowDual_TVL1_OCL::collectGarbage() @@ -348,7 +359,7 @@ void ocl_tvl1flow::estimateDualVariables(oclMat &u1, oclMat &u2, oclMat &p11, oc void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, oclMat &rho_c, oclMat &p11, oclMat &p12, oclMat &p21, oclMat &p22, oclMat &u1, - oclMat &u2, oclMat &error, float l_t, float theta) + oclMat &u2, oclMat &error, float l_t, float theta, char calc_error) { Context* clCxt = I1wx.clCxt; @@ -401,6 +412,7 @@ void ocl_tvl1flow::estimateU(oclMat &I1wx, oclMat &I1wy, oclMat &grad, args.push_back( make_pair( sizeof(cl_int), (void*)&u1_offset_y)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_x)); args.push_back( make_pair( sizeof(cl_int), (void*)&u2_offset_y)); + args.push_back( make_pair( sizeof(cl_char), (void*)&calc_error)); openCLExecuteKernel(clCxt, &tvl1flow, kernelName, globalThread, localThread, args, -1, -1); } diff --git a/modules/optim/src/precomp.cpp b/modules/optim/src/precomp.cpp deleted file mode 100644 index 3e0ec42de..000000000 --- a/modules/optim/src/precomp.cpp +++ /dev/null @@ -1,44 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// Intel License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000, Intel Corporation, all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and/or other materials provided with the distribution. -// -// * The name of Intel Corporation may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" - -/* End of file. */ diff --git a/modules/optim/test/test_precomp.cpp b/modules/optim/test/test_precomp.cpp deleted file mode 100644 index 5956e13e3..000000000 --- a/modules/optim/test/test_precomp.cpp +++ /dev/null @@ -1 +0,0 @@ -#include "test_precomp.hpp" diff --git a/modules/softcascade/perf/perf_precomp.cpp b/modules/softcascade/perf/perf_precomp.cpp deleted file mode 100644 index cc728b050..000000000 --- a/modules/softcascade/perf/perf_precomp.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and / or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "perf_precomp.hpp" diff --git a/modules/softcascade/src/precomp.cpp b/modules/softcascade/src/precomp.cpp deleted file mode 100644 index 62c236d08..000000000 --- a/modules/softcascade/src/precomp.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and / or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "precomp.hpp" diff --git a/modules/softcascade/test/test_precomp.cpp b/modules/softcascade/test/test_precomp.cpp deleted file mode 100644 index 278cdd03c..000000000 --- a/modules/softcascade/test/test_precomp.cpp +++ /dev/null @@ -1,43 +0,0 @@ -/*M/////////////////////////////////////////////////////////////////////////////////////// -// -// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. -// -// By downloading, copying, installing or using the software you agree to this license. -// If you do not agree to this license, do not download, install, -// copy or use the software. -// -// -// License Agreement -// For Open Source Computer Vision Library -// -// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. -// Copyright (C) 2008-2013, Willow Garage Inc., all rights reserved. -// Third party copyrights are property of their respective owners. -// -// Redistribution and use in source and binary forms, with or without modification, -// are permitted provided that the following conditions are met: -// -// * Redistribution's of source code must retain the above copyright notice, -// this list of conditions and the following disclaimer. -// -// * Redistribution's in binary form must reproduce the above copyright notice, -// this list of conditions and the following disclaimer in the documentation -// and / or other materials provided with the distribution. -// -// * The name of the copyright holders may not be used to endorse or promote products -// derived from this software without specific prior written permission. -// -// This software is provided by the copyright holders and contributors "as is" and -// any express or implied warranties, including, but not limited to, the implied -// warranties of merchantability and fitness for a particular purpose are disclaimed. -// In no event shall the Intel Corporation or contributors be liable for any direct, -// indirect, incidental, special, exemplary, or consequential damages -// (including, but not limited to, procurement of substitute goods or services; -// loss of use, data, or profits; or business interruption) however caused -// and on any theory of liability, whether in contract, strict liability, -// or tort (including negligence or otherwise) arising in any way out of -// the use of this software, even if advised of the possibility of such damage. -// -//M*/ - -#include "test_precomp.hpp" diff --git a/platforms/android/service/engine/AndroidManifest.xml b/platforms/android/service/engine/AndroidManifest.xml index 8eb06689c..dc992b3a6 100644 --- a/platforms/android/service/engine/AndroidManifest.xml +++ b/platforms/android/service/engine/AndroidManifest.xml @@ -1,8 +1,8 @@ + android:versionCode="210@ANDROID_PLATFORM_VERSION_CODE@" + android:versionName="2.10" > diff --git a/platforms/android/service/engine/CMakeLists.txt b/platforms/android/service/engine/CMakeLists.txt index 852a028ca..b1cac9383 100644 --- a/platforms/android/service/engine/CMakeLists.txt +++ b/platforms/android/service/engine/CMakeLists.txt @@ -26,19 +26,32 @@ endif() configure_file("${CMAKE_CURRENT_SOURCE_DIR}/${ANDROID_MANIFEST_FILE}" "${OpenCV_BINARY_DIR}/platforms/android/service/engine/.build/${ANDROID_MANIFEST_FILE}" @ONLY) -link_directories("${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}") +link_directories( + "${ANDROID_SOURCE_TREE}/out/target/product/generic/system/lib" + "${ANDROID_SOURCE_TREE}/out/target/product/${ANDROID_PRODUCT}/system/lib" + "${ANDROID_SOURCE_TREE}/bin/${ANDROID_ARCH_NAME}") + +file(GLOB engine_files "jni/BinderComponent/*.cpp" "jni/BinderComponent/*.h" "jni/include/*.h") +set(engine_libs "z" "binder" "log" "utils") + +if (TEGRA_DETECTOR) + if (ANDROID_NATIVE_API_LEVEL GREATER 8) + add_definitions(-DUSE_TEGRA_HW_DETECTOR) + list(APPEND engine_libs ${TEGRA_DETECTOR} GLESv2 EGL) + else() + message(FATAL_ERROR "Tegra detector required native api level 9 or above") + endif() +endif() # -D__SUPPORT_ARMEABI_FEATURES key is also available add_definitions(-DPLATFORM_ANDROID -D__SUPPORT_ARMEABI_V7A_FEATURES -D__SUPPORT_TEGRA3 -D__SUPPORT_MIPS) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti -fno-exceptions") - set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -Wl,-allow-shlib-undefined") -file(GLOB engine_files "jni/BinderComponent/*.cpp" "jni/BinderComponent/*.h" "jni/include/*.h") -include_directories(jni/BinderComponent jni/include) +include_directories("jni/BinderComponent" "jni/include") include_directories(SYSTEM "${ANDROID_SOURCE_TREE}/frameworks/base/include" "${ANDROID_SOURCE_TREE}/system/core/include") add_library(${engine} SHARED ${engine_files}) -target_link_libraries(${engine} z binder log utils) +target_link_libraries(${engine} ${engine_libs}) set_target_properties(${engine} PROPERTIES OUTPUT_NAME ${engine} @@ -51,7 +64,15 @@ add_custom_command(TARGET ${engine} POST_BUILD COMMAND ${CMAKE_STRIP} --strip-un file(GLOB engine_jni_files "jni/JNIWrapper/*.cpp" "jni/JNIWrapper/*.h" "jni/include/*.h") list(APPEND engine_jni_files jni/NativeService/CommonPackageManager.cpp jni/NativeService/PackageInfo.cpp) -include_directories(jni/include jni/JNIWrapper jni/NativeService jni/BinderComponent "${ANDROID_SOURCE_TREE}/frameworks/base/include" "${ANDROID_SOURCE_TREE}/system/core/include" "${ANDROID_SOURCE_TREE}/frameworks/base/core/jni") +include_directories( + jni/include jni/JNIWrapper + jni/NativeService + jni/BinderComponent + "${ANDROID_SOURCE_TREE}/frameworks/base/include" + "${ANDROID_SOURCE_TREE}/system/core/include" + "${ANDROID_SOURCE_TREE}/frameworks/base/core/jni" + ) + add_library(${engine}_jni SHARED ${engine_jni_files}) target_link_libraries(${engine}_jni z binder log utils android_runtime ${engine}) diff --git a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp index 15f70f08a..d0d8514b7 100644 --- a/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp +++ b/platforms/android/service/engine/jni/BinderComponent/HardwareDetector.cpp @@ -13,7 +13,7 @@ int GetCpuID() map cpu_info = GetCpuInfo(); map::const_iterator it; - #if defined(__i386__) +#if defined(__i386__) LOGD("Using X86 HW detector"); result |= ARCH_X86; it = cpu_info.find("flags"); @@ -161,8 +161,11 @@ int GetProcessorCount() int DetectKnownPlatforms() { +#if defined(__arm__) && defined(USE_TEGRA_HW_DETECTOR) int tegra_status = DetectTegra(); - +#else + int tegra_status = NOT_TEGRA; +#endif // All Tegra platforms since Tegra3 if (2 < tegra_status) { diff --git a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp deleted file mode 100644 index 6564c707f..000000000 --- a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "TegraDetector.h" -#include -#include - -#define KERNEL_CONFIG "/proc/config.gz" -#define KERNEL_CONFIG_MAX_LINE_WIDTH 512 -#define KERNEL_CONFIG_TEGRA_MAGIC "CONFIG_ARCH_TEGRA=y" -#define KERNEL_CONFIG_TEGRA2_MAGIC "CONFIG_ARCH_TEGRA_2x_SOC=y" -#define KERNEL_CONFIG_TEGRA3_MAGIC "CONFIG_ARCH_TEGRA_3x_SOC=y" -#define KERNEL_CONFIG_TEGRA4_MAGIC "CONFIG_ARCH_TEGRA_11x_SOC=y" -#define MAX_DATA_LEN 4096 - -int DetectTegra() -{ - int result = TEGRA_NOT_TEGRA; - gzFile kernelConfig = gzopen(KERNEL_CONFIG, "r"); - if (kernelConfig != 0) - { - char tmpbuf[KERNEL_CONFIG_MAX_LINE_WIDTH]; - const char *tegra_config = KERNEL_CONFIG_TEGRA_MAGIC; - const char *tegra2_config = KERNEL_CONFIG_TEGRA2_MAGIC; - const char *tegra3_config = KERNEL_CONFIG_TEGRA3_MAGIC; - const char *tegra4_config = KERNEL_CONFIG_TEGRA4_MAGIC; - int len = strlen(tegra_config); - int len2 = strlen(tegra2_config); - int len3 = strlen(tegra3_config); - int len4 = strlen(tegra4_config); - while (0 != gzgets(kernelConfig, tmpbuf, KERNEL_CONFIG_MAX_LINE_WIDTH)) - { - if (0 == strncmp(tmpbuf, tegra_config, len)) - { - result = 1; - } - - if (0 == strncmp(tmpbuf, tegra2_config, len2)) - { - result = 2; - break; - } - - if (0 == strncmp(tmpbuf, tegra3_config, len3)) - { - result = 3; - break; - } - - if (0 == strncmp(tmpbuf, tegra4_config, len4)) - { - result = 4; - break; - } - } - gzclose(kernelConfig); - } - else - { - result = TEGRA_DETECTOR_ERROR; - } - - return result; -} diff --git a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h index e2b0441ec..4ca930b75 100644 --- a/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h +++ b/platforms/android/service/engine/jni/BinderComponent/TegraDetector.h @@ -2,7 +2,12 @@ #define __TEGRA_DETECTOR_H__ #define TEGRA_DETECTOR_ERROR -2 -#define TEGRA_NOT_TEGRA -1 +#define NOT_TEGRA -1 +#define TEGRA2 2 +#define TEGRA3 3 +#define TEGRA4i 4 +#define TEGRA4 5 +#define TEGRA5 6 int DetectTegra(); diff --git a/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java index 7fc7e1ae8..dc82ec30c 100644 --- a/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java +++ b/platforms/android/service/engine/src/org/opencv/engine/HardwareDetector.java @@ -30,11 +30,12 @@ public class HardwareDetector // GPU Acceleration options public static final int FEATURES_HAS_GPU = 0x010000; - public static final int PLATFORM_TEGRA = 1; - public static final int PLATFORM_TEGRA2 = 2; - public static final int PLATFORM_TEGRA3 = 3; - public static final int PLATFORM_TEGRA4 = 4; - + public static final int PLATFORM_TEGRA = 1; + public static final int PLATFORM_TEGRA2 = 2; + public static final int PLATFORM_TEGRA3 = 3; + public static final int PLATFORM_TEGRA4i = 4; + public static final int PLATFORM_TEGRA4 = 5; + public static final int PLATFORM_TEGRA5 = 6; public static final int PLATFORM_UNKNOWN = 0; diff --git a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java index 3c1aac994..e22f7b529 100644 --- a/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java +++ b/platforms/android/service/engine/src/org/opencv/engine/manager/ManagerActivity.java @@ -107,6 +107,10 @@ public class ManagerActivity extends Activity { HardwarePlatformView.setText("Tegra 3"); } + else if (HardwareDetector.PLATFORM_TEGRA4i == Platfrom) + { + HardwarePlatformView.setText("Tegra 4i"); + } else { HardwarePlatformView.setText("Tegra 4"); diff --git a/platforms/scripts/cmake_winrt.cmd b/platforms/scripts/cmake_winrt.cmd index ef5c27656..392781ceb 100644 --- a/platforms/scripts/cmake_winrt.cmd +++ b/platforms/scripts/cmake_winrt.cmd @@ -6,4 +6,4 @@ set msvc_path=C:\Program Files\Microsoft Visual Studio 11.0 call "%msvc_path%\Common7\Tools\VsDevCmd.bat" call "%msvc_path%\VC\bin\x86_arm\vcvarsx86_arm.bat" -cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DENABLE_WINRT_MODE=ON -DWITH_FFMPEG=OFF -DWITH_MSMF=OFF -DWITH_DSHOW=OFF -DWITH_VFW=OFF -DWITH_TIFF=OFF -DWITH_OPENEXR=OFF -DWITH_CUDA=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DBUILD_opencv_java=OFF -DCMAKE_TOOLCHAIN_FILE=..\winrt\arm.winrt.toolchain.cmake %* ..\.. +cmake.exe -GNinja -DCMAKE_BUILD_TYPE=Release -DENABLE_WINRT_MODE=ON -DWITH_FFMPEG=OFF -DWITH_MSMF=OFF -DWITH_DSHOW=OFF -DWITH_VFW=OFF -DWITH_OPENEXR=OFF -DWITH_CUDA=OFF -DBUILD_opencv_gpu=OFF -DBUILD_opencv_python=OFF -DBUILD_opencv_java=OFF -DCMAKE_TOOLCHAIN_FILE=..\winrt\arm.winrt.toolchain.cmake %* ..\.. diff --git a/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java b/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java index 88ade8430..38f1d5959 100644 --- a/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java +++ b/samples/android/image-manipulations/src/org/opencv/samples/imagemanipulations/ImageManipulationsActivity.java @@ -48,17 +48,12 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView private CameraBridgeViewBase mOpenCvCameraView; private Size mSize0; - private Size mSizeRgba; - private Size mSizeRgbaInner; - private Mat mRgba; - private Mat mGray; private Mat mIntermediateMat; - private Mat mHist; private Mat mMat0; private MatOfInt mChannels[]; private MatOfInt mHistSize; - private int mHistSizeNum; + private int mHistSizeNum = 25; private MatOfFloat mRanges; private Scalar mColorsRGB[]; private Scalar mColorsHue[]; @@ -66,10 +61,6 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView private Point mP1; private Point mP2; private float mBuff[]; - private Mat mRgbaInnerWindow; - private Mat mGrayInnerWindow; - private Mat mZoomWindow; - private Mat mZoomCorner; private Mat mSepiaKernel; public static int viewMode = VIEW_MODE_RGBA; @@ -166,13 +157,9 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView } public void onCameraViewStarted(int width, int height) { - mGray = new Mat(); - mRgba = new Mat(); mIntermediateMat = new Mat(); mSize0 = new Size(); - mHist = new Mat(); mChannels = new MatOfInt[] { new MatOfInt(0), new MatOfInt(1), new MatOfInt(2) }; - mHistSizeNum = 25; mBuff = new float[mHistSizeNum]; mHistSize = new MatOfInt(mHistSizeNum); mRanges = new MatOfFloat(0f, 256f); @@ -197,14 +184,22 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView mSepiaKernel.put(3, 0, /* A */0.000f, 0.000f, 0.000f, 1f); } - private void CreateAuxiliaryMats() { - if (mRgba.empty()) - return; + public void onCameraViewStopped() { + // Explicitly deallocate Mats + if (mIntermediateMat != null) + mIntermediateMat.release(); - mSizeRgba = mRgba.size(); + mIntermediateMat = null; + } - int rows = (int) mSizeRgba.height; - int cols = (int) mSizeRgba.width; + public Mat onCameraFrame(CvCameraViewFrame inputFrame) { + Mat rgba = inputFrame.rgba(); + Size sizeRgba = rgba.size(); + + Mat rgbaInnerWindow; + + int rows = (int) sizeRgba.height; + int cols = (int) sizeRgba.width; int left = cols / 8; int top = rows / 8; @@ -212,151 +207,107 @@ public class ImageManipulationsActivity extends Activity implements CvCameraView int width = cols * 3 / 4; int height = rows * 3 / 4; - if (mRgbaInnerWindow == null) - mRgbaInnerWindow = mRgba.submat(top, top + height, left, left + width); - mSizeRgbaInner = mRgbaInnerWindow.size(); - - if (mGrayInnerWindow == null && !mGray.empty()) - mGrayInnerWindow = mGray.submat(top, top + height, left, left + width); - - if (mZoomCorner == null) - mZoomCorner = mRgba.submat(0, rows / 2 - rows / 10, 0, cols / 2 - cols / 10); - - if (mZoomWindow == null) - mZoomWindow = mRgba.submat(rows / 2 - 9 * rows / 100, rows / 2 + 9 * rows / 100, cols / 2 - 9 * cols / 100, cols / 2 + 9 * cols / 100); - } - - public void onCameraViewStopped() { - // Explicitly deallocate Mats - if (mZoomWindow != null) - mZoomWindow.release(); - if (mZoomCorner != null) - mZoomCorner.release(); - if (mGrayInnerWindow != null) - mGrayInnerWindow.release(); - if (mRgbaInnerWindow != null) - mRgbaInnerWindow.release(); - if (mRgba != null) - mRgba.release(); - if (mGray != null) - mGray.release(); - if (mIntermediateMat != null) - mIntermediateMat.release(); - - mRgba = null; - mGray = null; - mIntermediateMat = null; - mRgbaInnerWindow = null; - mGrayInnerWindow = null; - mZoomCorner = null; - mZoomWindow = null; - } - - public Mat onCameraFrame(CvCameraViewFrame inputFrame) { - mRgba = inputFrame.rgba(); - switch (ImageManipulationsActivity.viewMode) { case ImageManipulationsActivity.VIEW_MODE_RGBA: break; case ImageManipulationsActivity.VIEW_MODE_HIST: - if ((mSizeRgba == null) || (mRgba.cols() != mSizeRgba.width) || (mRgba.height() != mSizeRgba.height)) - CreateAuxiliaryMats(); - int thikness = (int) (mSizeRgba.width / (mHistSizeNum + 10) / 5); + Mat hist = new Mat(); + int thikness = (int) (sizeRgba.width / (mHistSizeNum + 10) / 5); if(thikness > 5) thikness = 5; - int offset = (int) ((mSizeRgba.width - (5*mHistSizeNum + 4*10)*thikness)/2); + int offset = (int) ((sizeRgba.width - (5*mHistSizeNum + 4*10)*thikness)/2); // RGB for(int c=0; c<3; c++) { - Imgproc.calcHist(Arrays.asList(mRgba), mChannels[c], mMat0, mHist, mHistSize, mRanges); - Core.normalize(mHist, mHist, mSizeRgba.height/2, 0, Core.NORM_INF); - mHist.get(0, 0, mBuff); + Imgproc.calcHist(Arrays.asList(rgba), mChannels[c], mMat0, hist, mHistSize, mRanges); + Core.normalize(hist, hist, sizeRgba.height/2, 0, Core.NORM_INF); + hist.get(0, 0, mBuff); for(int h=0; h +#else + #include + #include #endif #include -#include "cvconfig.h" +#include + #include "opencv2/core.hpp" #include "opencv2/highgui.hpp" +#include "opencv2/imgproc.hpp" +#include "opencv2/contrib.hpp" #include "opencv2/cudastereo.hpp" -#ifdef HAVE_TBB -# include "tbb/tbb_stddef.h" -# if TBB_VERSION_MAJOR*100 + TBB_VERSION_MINOR >= 202 -# include "tbb/tbb.h" -# include "tbb/task.h" -# undef min -# undef max -# else -# undef HAVE_TBB -# endif -#endif - -#if !defined(HAVE_CUDA) || !defined(HAVE_TBB) - -int main() -{ -#if !defined(HAVE_CUDA) - std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n"; -#endif - -#if !defined(HAVE_TBB) - std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n"; -#endif - - return 0; -} - -#else - using namespace std; using namespace cv; using namespace cv::cuda; -struct Worker { void operator()(int device_id) const; }; +/////////////////////////////////////////////////////////// +// Thread +// OS-specific wrappers for multi-threading -// GPUs data -GpuMat d_left[2]; -GpuMat d_right[2]; -Ptr bm[2]; -GpuMat d_result[2]; - -static void printHelp() +#ifdef WIN32 +class Thread { - std::cout << "Usage: stereo_multi_gpu --left --right \n"; + struct UserData + { + void (*func)(void* userData); + void* param; + }; + + static DWORD WINAPI WinThreadFunction(LPVOID lpParam) + { + UserData* userData = static_cast(lpParam); + + userData->func(userData->param); + + return 0; + } + + UserData userData_; + HANDLE thread_; + DWORD threadId_; + +public: + Thread(void (*func)(void* userData), void* userData) + { + userData_.func = func; + userData_.param = userData; + + thread_ = CreateThread( + NULL, // default security attributes + 0, // use default stack size + WinThreadFunction, // thread function name + &userData_, // argument to thread function + 0, // use default creation flags + &threadId_); // returns the thread identifier + } + + ~Thread() + { + CloseHandle(thread_); + } + + void wait() + { + WaitForSingleObject(thread_, INFINITE); + } +}; +#else +class Thread +{ + struct UserData + { + void (*func)(void* userData); + void* param; + }; + + static void* PThreadFunction(void* lpParam) + { + UserData* userData = static_cast(lpParam); + + userData->func(userData->param); + + return 0; + } + + pthread_t thread_; + UserData userData_; + +public: + Thread(void (*func)(void* userData), void* userData) + { + userData_.func = func; + userData_.param = userData; + + pthread_create(&thread_, NULL, PThreadFunction, &userData_); + } + + ~Thread() + { + pthread_detach(thread_); + } + + void wait() + { + pthread_join(thread_, NULL); + } +}; +#endif + +/////////////////////////////////////////////////////////// +// StereoSingleGpu +// Run Stereo algorithm on single GPU + +class StereoSingleGpu +{ +public: + explicit StereoSingleGpu(int deviceId = 0); + ~StereoSingleGpu(); + + void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity); + +private: + int deviceId_; + GpuMat d_leftFrame; + GpuMat d_rightFrame; + GpuMat d_disparity; + Ptr d_alg; +}; + +StereoSingleGpu::StereoSingleGpu(int deviceId) : deviceId_(deviceId) +{ + cuda::setDevice(deviceId_); + d_alg = cuda::createStereoBM(256); } +StereoSingleGpu::~StereoSingleGpu() +{ + cuda::setDevice(deviceId_); + d_leftFrame.release(); + d_rightFrame.release(); + d_disparity.release(); + d_alg.release(); +} + +void StereoSingleGpu::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity) +{ + cuda::setDevice(deviceId_); + d_leftFrame.upload(leftFrame); + d_rightFrame.upload(rightFrame); + d_alg->compute(d_leftFrame, d_rightFrame, d_disparity); + d_disparity.download(disparity); +} + +/////////////////////////////////////////////////////////// +// StereoMultiGpuThread +// Run Stereo algorithm on two GPUs using different host threads + +class StereoMultiGpuThread +{ +public: + StereoMultiGpuThread(); + ~StereoMultiGpuThread(); + + void compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity); + +private: + GpuMat d_leftFrames[2]; + GpuMat d_rightFrames[2]; + GpuMat d_disparities[2]; + Ptr d_algs[2]; + + struct StereoLaunchData + { + int deviceId; + Mat leftFrame; + Mat rightFrame; + Mat disparity; + GpuMat* d_leftFrame; + GpuMat* d_rightFrame; + GpuMat* d_disparity; + Ptr d_alg; + }; + + static void launchGpuStereoAlg(void* userData); +}; + +StereoMultiGpuThread::StereoMultiGpuThread() +{ + cuda::setDevice(0); + d_algs[0] = cuda::createStereoBM(256); + + cuda::setDevice(1); + d_algs[1] = cuda::createStereoBM(256); +} + +StereoMultiGpuThread::~StereoMultiGpuThread() +{ + cuda::setDevice(0); + d_leftFrames[0].release(); + d_rightFrames[0].release(); + d_disparities[0].release(); + d_algs[0].release(); + + cuda::setDevice(1); + d_leftFrames[1].release(); + d_rightFrames[1].release(); + d_disparities[1].release(); + d_algs[1].release(); +} + +void StereoMultiGpuThread::compute(const Mat& leftFrame, const Mat& rightFrame, Mat& disparity) +{ + disparity.create(leftFrame.size(), CV_8UC1); + + // Split input data onto two parts for each GPUs. + // We add small border for each part, + // because original algorithm doesn't calculate disparity on image borders. + // With such padding we will get output in the middle of final result. + + StereoLaunchData launchDatas[2]; + + launchDatas[0].deviceId = 0; + launchDatas[0].leftFrame = leftFrame.rowRange(0, leftFrame.rows / 2 + 32); + launchDatas[0].rightFrame = rightFrame.rowRange(0, rightFrame.rows / 2 + 32); + launchDatas[0].disparity = disparity.rowRange(0, leftFrame.rows / 2); + launchDatas[0].d_leftFrame = &d_leftFrames[0]; + launchDatas[0].d_rightFrame = &d_rightFrames[0]; + launchDatas[0].d_disparity = &d_disparities[0]; + launchDatas[0].d_alg = d_algs[0]; + + launchDatas[1].deviceId = 1; + launchDatas[1].leftFrame = leftFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows); + launchDatas[1].rightFrame = rightFrame.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows); + launchDatas[1].disparity = disparity.rowRange(leftFrame.rows / 2, leftFrame.rows); + launchDatas[1].d_leftFrame = &d_leftFrames[1]; + launchDatas[1].d_rightFrame = &d_rightFrames[1]; + launchDatas[1].d_disparity = &d_disparities[1]; + launchDatas[1].d_alg = d_algs[1]; + + Thread thread0(launchGpuStereoAlg, &launchDatas[0]); + Thread thread1(launchGpuStereoAlg, &launchDatas[1]); + + thread0.wait(); + thread1.wait(); +} + +void StereoMultiGpuThread::launchGpuStereoAlg(void* userData) +{ + StereoLaunchData* data = static_cast(userData); + + cuda::setDevice(data->deviceId); + data->d_leftFrame->upload(data->leftFrame); + data->d_rightFrame->upload(data->rightFrame); + data->d_alg->compute(*data->d_leftFrame, *data->d_rightFrame, *data->d_disparity); + + if (data->deviceId == 0) + data->d_disparity->rowRange(0, data->d_disparity->rows - 32).download(data->disparity); + else + data->d_disparity->rowRange(32, data->d_disparity->rows).download(data->disparity); +} + +/////////////////////////////////////////////////////////// +// StereoMultiGpuStream +// Run Stereo algorithm on two GPUs from single host thread using async API + +class StereoMultiGpuStream +{ +public: + StereoMultiGpuStream(); + ~StereoMultiGpuStream(); + + void compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity); + +private: + GpuMat d_leftFrames[2]; + GpuMat d_rightFrames[2]; + GpuMat d_disparities[2]; + Ptr d_algs[2]; + Ptr streams[2]; +}; + +StereoMultiGpuStream::StereoMultiGpuStream() +{ + cuda::setDevice(0); + d_algs[0] = cuda::createStereoBM(256); + streams[0] = new Stream; + + cuda::setDevice(1); + d_algs[1] = cuda::createStereoBM(256); + streams[1] = new Stream; +} + +StereoMultiGpuStream::~StereoMultiGpuStream() +{ + cuda::setDevice(0); + d_leftFrames[0].release(); + d_rightFrames[0].release(); + d_disparities[0].release(); + d_algs[0].release(); + streams[0].release(); + + cuda::setDevice(1); + d_leftFrames[1].release(); + d_rightFrames[1].release(); + d_disparities[1].release(); + d_algs[1].release(); + streams[1].release(); +} + +void StereoMultiGpuStream::compute(const CudaMem& leftFrame, const CudaMem& rightFrame, CudaMem& disparity) +{ + disparity.create(leftFrame.size(), CV_8UC1); + + // Split input data onto two parts for each GPUs. + // We add small border for each part, + // because original algorithm doesn't calculate disparity on image borders. + // With such padding we will get output in the middle of final result. + + Mat leftFrameHdr = leftFrame.createMatHeader(); + Mat rightFrameHdr = rightFrame.createMatHeader(); + Mat disparityHdr = disparity.createMatHeader(); + Mat disparityPart0 = disparityHdr.rowRange(0, leftFrame.rows / 2); + Mat disparityPart1 = disparityHdr.rowRange(leftFrame.rows / 2, leftFrame.rows); + + cuda::setDevice(0); + d_leftFrames[0].upload(leftFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]); + d_rightFrames[0].upload(rightFrameHdr.rowRange(0, leftFrame.rows / 2 + 32), *streams[0]); + d_algs[0]->compute(d_leftFrames[0], d_rightFrames[0], d_disparities[0], *streams[0]); + d_disparities[0].rowRange(0, leftFrame.rows / 2).download(disparityPart0, *streams[0]); + + cuda::setDevice(1); + d_leftFrames[1].upload(leftFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]); + d_rightFrames[1].upload(rightFrameHdr.rowRange(leftFrame.rows / 2 - 32, leftFrame.rows), *streams[1]); + d_algs[1]->compute(d_leftFrames[1], d_rightFrames[1], d_disparities[1], *streams[1]); + d_disparities[1].rowRange(32, d_disparities[1].rows).download(disparityPart1, *streams[1]); + + cuda::setDevice(0); + streams[0]->waitForCompletion(); + + cuda::setDevice(1); + streams[1]->waitForCompletion(); +} + +/////////////////////////////////////////////////////////// +// main + int main(int argc, char** argv) { - if (argc < 5) + if (argc != 3) { - printHelp(); + cerr << "Usage: stereo_multi_gpu " << endl; return -1; } - int num_devices = getCudaEnabledDeviceCount(); - if (num_devices < 2) + const int numDevices = getCudaEnabledDeviceCount(); + if (numDevices != 2) { - std::cout << "Two or more GPUs are required\n"; + cerr << "Two GPUs are required" << endl; return -1; } - for (int i = 0; i < num_devices; ++i) - { - cv::cuda::printShortCudaDeviceInfo(i); - DeviceInfo dev_info(i); - if (!dev_info.isCompatible()) + for (int i = 0; i < numDevices; ++i) + { + DeviceInfo devInfo(i); + if (!devInfo.isCompatible()) { - std::cout << "CUDA module isn't built for GPU #" << i << " (" - << dev_info.name() << ", CC " << dev_info.majorVersion() - << dev_info.minorVersion() << "\n"; + cerr << "CUDA module was't built for GPU #" << i << " (" + << devInfo.name() << ", CC " << devInfo.majorVersion() + << devInfo.minorVersion() << endl; return -1; } + + printShortCudaDeviceInfo(i); } - // Load input data - Mat left, right; - for (int i = 1; i < argc; ++i) + VideoCapture leftVideo(argv[1]); + VideoCapture rightVideo(argv[2]); + + if (!leftVideo.isOpened()) { - if (string(argv[i]) == "--left") + cerr << "Can't open " << argv[1] << " video file" << endl; + return -1; + } + + if (!rightVideo.isOpened()) + { + cerr << "Can't open " << argv[2] << " video file" << endl; + return -1; + } + + cout << endl; + cout << "This sample demonstrates working on one piece of data using two GPUs." << endl; + cout << "It splits input into two parts and processes them separately on different GPUs." << endl; + cout << endl; + + Mat leftFrame, rightFrame; + CudaMem leftGrayFrame, rightGrayFrame; + + StereoSingleGpu gpu0Alg(0); + StereoSingleGpu gpu1Alg(1); + StereoMultiGpuThread multiThreadAlg; + StereoMultiGpuStream multiStreamAlg; + + Mat disparityGpu0; + Mat disparityGpu1; + Mat disparityMultiThread; + CudaMem disparityMultiStream; + + Mat disparityGpu0Show; + Mat disparityGpu1Show; + Mat disparityMultiThreadShow; + Mat disparityMultiStreamShow; + + TickMeter tm; + + cout << "-------------------------------------------------------------------" << endl; + cout << "| Frame | GPU 0 ms | GPU 1 ms | Multi Thread ms | Multi Stream ms |" << endl; + cout << "-------------------------------------------------------------------" << endl; + + for (int i = 0;; ++i) + { + leftVideo >> leftFrame; + rightVideo >> rightFrame; + + if (leftFrame.empty() || rightFrame.empty()) + break; + + if (leftFrame.size() != rightFrame.size()) { - left = imread(argv[++i], cv::IMREAD_GRAYSCALE); - CV_Assert(!left.empty()); - } - else if (string(argv[i]) == "--right") - { - right = imread(argv[++i], cv::IMREAD_GRAYSCALE); - CV_Assert(!right.empty()); - } - else if (string(argv[i]) == "--help") - { - printHelp(); + cerr << "Frames have different sizes" << endl; return -1; } + + leftGrayFrame.create(leftFrame.size(), CV_8UC1); + rightGrayFrame.create(leftFrame.size(), CV_8UC1); + + cvtColor(leftFrame, leftGrayFrame.createMatHeader(), COLOR_BGR2GRAY); + cvtColor(rightFrame, rightGrayFrame.createMatHeader(), COLOR_BGR2GRAY); + + tm.reset(); tm.start(); + gpu0Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(), + disparityGpu0); + tm.stop(); + + const double gpu0Time = tm.getTimeMilli(); + + tm.reset(); tm.start(); + gpu1Alg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(), + disparityGpu1); + tm.stop(); + + const double gpu1Time = tm.getTimeMilli(); + + tm.reset(); tm.start(); + multiThreadAlg.compute(leftGrayFrame.createMatHeader(), rightGrayFrame.createMatHeader(), + disparityMultiThread); + tm.stop(); + + const double multiThreadTime = tm.getTimeMilli(); + + tm.reset(); tm.start(); + multiStreamAlg.compute(leftGrayFrame, rightGrayFrame, disparityMultiStream); + tm.stop(); + + const double multiStreamTime = tm.getTimeMilli(); + + cout << "| " << setw(5) << i << " | " + << setw(8) << setprecision(1) << fixed << gpu0Time << " | " + << setw(8) << setprecision(1) << fixed << gpu1Time << " | " + << setw(15) << setprecision(1) << fixed << multiThreadTime << " | " + << setw(15) << setprecision(1) << fixed << multiStreamTime << " |" << endl; + + resize(disparityGpu0, disparityGpu0Show, Size(1024, 768), 0, 0, INTER_AREA); + resize(disparityGpu1, disparityGpu1Show, Size(1024, 768), 0, 0, INTER_AREA); + resize(disparityMultiThread, disparityMultiThreadShow, Size(1024, 768), 0, 0, INTER_AREA); + resize(disparityMultiStream.createMatHeader(), disparityMultiStreamShow, Size(1024, 768), 0, 0, INTER_AREA); + + imshow("disparityGpu0", disparityGpu0Show); + imshow("disparityGpu1", disparityGpu1Show); + imshow("disparityMultiThread", disparityMultiThreadShow); + imshow("disparityMultiStream", disparityMultiStreamShow); + + const int key = waitKey(30) & 0xff; + if (key == 27) + break; } - // Split source images for processing on the GPU #0 - setDevice(0); - d_left[0].upload(left.rowRange(0, left.rows / 2)); - d_right[0].upload(right.rowRange(0, right.rows / 2)); - bm[0] = cuda::createStereoBM(); + cout << "-------------------------------------------------------------------" << endl; - // Split source images for processing on the GPU #1 - setDevice(1); - d_left[1].upload(left.rowRange(left.rows / 2, left.rows)); - d_right[1].upload(right.rowRange(right.rows / 2, right.rows)); - bm[1] = cuda::createStereoBM(); - - // Execute calculation in two threads using two GPUs - int devices[] = {0, 1}; - tbb::parallel_do(devices, devices + 2, Worker()); - - // Release the first GPU resources - setDevice(0); - imshow("GPU #0 result", Mat(d_result[0])); - d_left[0].release(); - d_right[0].release(); - d_result[0].release(); - bm[0].release(); - - // Release the second GPU resources - setDevice(1); - imshow("GPU #1 result", Mat(d_result[1])); - d_left[1].release(); - d_right[1].release(); - d_result[1].release(); - bm[1].release(); - - waitKey(); return 0; } - - -void Worker::operator()(int device_id) const -{ - setDevice(device_id); - - bm[device_id]->compute(d_left[device_id], d_right[device_id], d_result[device_id]); - - std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() - << "): finished\n"; -} - -#endif diff --git a/samples/winrt/ImageManipulations/assets/windows-sdk.png b/samples/winrt/ImageManipulations/assets/windows-sdk.png index 67268021d..7f753f7b6 100644 Binary files a/samples/winrt/ImageManipulations/assets/windows-sdk.png and b/samples/winrt/ImageManipulations/assets/windows-sdk.png differ