From 5877debb6f6b2599c7f2cc9e1ce5ad9d4931cd9b Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 22 Mar 2016 16:52:23 +0300 Subject: [PATCH 1/7] HAL resize, warpAffine, warpPerspective interface - added HAL documentation support - added documentation to HAL replacement interface - updated several HAL functions in imgproc module --- doc/CMakeLists.txt | 8 +- modules/core/include/opencv2/core/cvdef.h | 61 ---- modules/core/include/opencv2/core/hal/hal.hpp | 14 +- .../core/include/opencv2/core/hal/interface.h | 121 +++++-- modules/core/src/arithm.cpp | 44 +-- modules/core/src/arithm_core.hpp | 4 +- modules/core/src/hal_replacement.hpp | 329 +++++++++++++----- modules/imgproc/include/opencv2/imgproc.hpp | 5 + .../include/opencv2/imgproc/hal/hal.hpp | 23 +- .../include/opencv2/imgproc/hal/interface.h | 26 ++ modules/imgproc/src/hal_replacement.hpp | 298 +++++++++++++++- modules/imgproc/src/imgwarp.cpp | 228 +++++++----- modules/imgproc/src/morph.cpp | 16 +- 13 files changed, 866 insertions(+), 311 deletions(-) create mode 100644 modules/imgproc/include/opencv2/imgproc/hal/interface.h diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt index c8ee7630e..ef579fb97 100644 --- a/doc/CMakeLists.txt +++ b/doc/CMakeLists.txt @@ -42,6 +42,7 @@ if(BUILD_DOCS AND DOXYGEN_FOUND) set(paths_bib) set(paths_sample) set(paths_tutorial) + set(paths_hal_interface) set(refs_main) set(refs_extra) set(deps) @@ -87,6 +88,11 @@ if(BUILD_DOCS AND DOXYGEN_FOUND) file(APPEND "${tutorial_contrib_root}" "- ${m}. @subpage ${tutorial_id}\n") endforeach() endif() + # HAL replacement file + set(replacement_header "${OPENCV_MODULE_opencv_${m}_LOCATION}/src/hal_replacement.hpp") + if(EXISTS "${replacement_header}") + list(APPEND paths_hal_interface "${replacement_header}") + endif() # BiBTeX file set(bib_file "${docs_dir}/${m}.bib") @@ -131,7 +137,7 @@ if(BUILD_DOCS AND DOXYGEN_FOUND) set(example_path "${CMAKE_SOURCE_DIR}/samples") # set export variables - string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${faqfile} ; ${paths_include} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${paths_tutorial} ; ${tutorial_contrib_root}") + string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_INPUT_LIST "${rootfile} ; ${faqfile} ; ${paths_include} ; ${paths_hal_interface} ; ${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${paths_tutorial} ; ${tutorial_contrib_root}") string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_IMAGE_PATH "${paths_doc} ; ${tutorial_path} ; ${tutorial_py_path} ; ${paths_tutorial}") # TODO: remove paths_doc from EXAMPLE_PATH after face module tutorials/samples moved to separate folders string(REPLACE ";" " \\\n" CMAKE_DOXYGEN_EXAMPLE_PATH "${example_path} ; ${paths_doc} ; ${paths_sample}") diff --git a/modules/core/include/opencv2/core/cvdef.h b/modules/core/include/opencv2/core/cvdef.h index af2abfbb2..c00591419 100644 --- a/modules/core/include/opencv2/core/cvdef.h +++ b/modules/core/include/opencv2/core/cvdef.h @@ -357,67 +357,6 @@ Cv64suf; * Matrix type (Mat) * \****************************************************************************************/ -#define CV_CN_MAX 512 -#define CV_CN_SHIFT 3 -#define CV_DEPTH_MAX (1 << CV_CN_SHIFT) - -#define CV_8U 0 -#define CV_8S 1 -#define CV_16U 2 -#define CV_16S 3 -#define CV_32S 4 -#define CV_32F 5 -#define CV_64F 6 -#define CV_USRTYPE1 7 - -#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) -#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) - -#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) -#define CV_MAKE_TYPE CV_MAKETYPE - -#define CV_8UC1 CV_MAKETYPE(CV_8U,1) -#define CV_8UC2 CV_MAKETYPE(CV_8U,2) -#define CV_8UC3 CV_MAKETYPE(CV_8U,3) -#define CV_8UC4 CV_MAKETYPE(CV_8U,4) -#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) - -#define CV_8SC1 CV_MAKETYPE(CV_8S,1) -#define CV_8SC2 CV_MAKETYPE(CV_8S,2) -#define CV_8SC3 CV_MAKETYPE(CV_8S,3) -#define CV_8SC4 CV_MAKETYPE(CV_8S,4) -#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) - -#define CV_16UC1 CV_MAKETYPE(CV_16U,1) -#define CV_16UC2 CV_MAKETYPE(CV_16U,2) -#define CV_16UC3 CV_MAKETYPE(CV_16U,3) -#define CV_16UC4 CV_MAKETYPE(CV_16U,4) -#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) - -#define CV_16SC1 CV_MAKETYPE(CV_16S,1) -#define CV_16SC2 CV_MAKETYPE(CV_16S,2) -#define CV_16SC3 CV_MAKETYPE(CV_16S,3) -#define CV_16SC4 CV_MAKETYPE(CV_16S,4) -#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) - -#define CV_32SC1 CV_MAKETYPE(CV_32S,1) -#define CV_32SC2 CV_MAKETYPE(CV_32S,2) -#define CV_32SC3 CV_MAKETYPE(CV_32S,3) -#define CV_32SC4 CV_MAKETYPE(CV_32S,4) -#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) - -#define CV_32FC1 CV_MAKETYPE(CV_32F,1) -#define CV_32FC2 CV_MAKETYPE(CV_32F,2) -#define CV_32FC3 CV_MAKETYPE(CV_32F,3) -#define CV_32FC4 CV_MAKETYPE(CV_32F,4) -#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) - -#define CV_64FC1 CV_MAKETYPE(CV_64F,1) -#define CV_64FC2 CV_MAKETYPE(CV_64F,2) -#define CV_64FC3 CV_MAKETYPE(CV_64F,3) -#define CV_64FC4 CV_MAKETYPE(CV_64F,4) -#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) - #define CV_MAT_CN_MASK ((CV_CN_MAX - 1) << CV_CN_SHIFT) #define CV_MAT_CN(flags) ((((flags) & CV_MAT_CN_MASK) >> CV_CN_SHIFT) + 1) #define CV_MAT_TYPE_MASK (CV_DEPTH_MAX*CV_CN_MAX - 1) diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 118913eb7..64af09ab8 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -171,13 +171,13 @@ CV_EXPORTS void div32s( const int* src1, size_t step1, const int* src2, size_t s CV_EXPORTS void div32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); CV_EXPORTS void div64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); -CV_EXPORTS void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip8u( const uchar *, size_t, const uchar * src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip8s( const schar *, size_t, const schar * src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16u( const ushort *, size_t, const ushort * src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip16s( const short *, size_t, const short * src2, size_t step2, short* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32s( const int *, size_t, const int * src2, size_t step2, int* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip32f( const float *, size_t, const float * src2, size_t step2, float* dst, size_t step, int width, int height, void* scale); +CV_EXPORTS void recip64f( const double *, size_t, const double * src2, size_t step2, double* dst, size_t step, int width, int height, void* scale); CV_EXPORTS void addWeighted8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* _scalars ); CV_EXPORTS void addWeighted8s( const schar* src1, size_t step1, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scalars ); diff --git a/modules/core/include/opencv2/core/hal/interface.h b/modules/core/include/opencv2/core/hal/interface.h index 51f760610..ba3f55c33 100644 --- a/modules/core/include/opencv2/core/hal/interface.h +++ b/modules/core/include/opencv2/core/hal/interface.h @@ -1,19 +1,16 @@ -#ifndef _HAL_INTERFACE_HPP_INCLUDED_ -#define _HAL_INTERFACE_HPP_INCLUDED_ +#ifndef OPENCV_CORE_HAL_INTERFACE_H +#define OPENCV_CORE_HAL_INTERFACE_H //! @addtogroup core_hal_interface //! @{ +//! @name Return codes +//! @{ #define CV_HAL_ERROR_OK 0 #define CV_HAL_ERROR_NOT_IMPLEMENTED 1 #define CV_HAL_ERROR_UNKNOWN -1 +//! @} -#define CV_HAL_CMP_EQ 0 -#define CV_HAL_CMP_GT 1 -#define CV_HAL_CMP_GE 2 -#define CV_HAL_CMP_LT 3 -#define CV_HAL_CMP_LE 4 -#define CV_HAL_CMP_NE 5 #ifdef __cplusplus #include @@ -21,18 +18,17 @@ #include #endif -/* primitive types */ -/* - schar - signed 1 byte integer - uchar - unsigned 1 byte integer - short - signed 2 byte integer - ushort - unsigned 2 byte integer - int - signed 4 byte integer - uint - unsigned 4 byte integer - int64 - signed 8 byte integer - uint64 - unsigned 8 byte integer -*/ - +//! @name Data types +//! primitive types +//! - schar - signed 1 byte integer +//! - uchar - unsigned 1 byte integer +//! - short - signed 2 byte integer +//! - ushort - unsigned 2 byte integer +//! - int - signed 4 byte integer +//! - uint - unsigned 4 byte integer +//! - int64 - signed 8 byte integer +//! - uint64 - unsigned 8 byte integer +//! @{ #if !defined _MSC_VER && !defined __BORLANDC__ # if defined __cplusplus && __cplusplus >= 201103L && !defined __APPLE__ # include @@ -64,6 +60,91 @@ typedef signed char schar; # define CV_BIG_UINT(n) n##ULL #endif +#define CV_CN_MAX 512 +#define CV_CN_SHIFT 3 +#define CV_DEPTH_MAX (1 << CV_CN_SHIFT) + +#define CV_8U 0 +#define CV_8S 1 +#define CV_16U 2 +#define CV_16S 3 +#define CV_32S 4 +#define CV_32F 5 +#define CV_64F 6 +#define CV_USRTYPE1 7 + +#define CV_MAT_DEPTH_MASK (CV_DEPTH_MAX - 1) +#define CV_MAT_DEPTH(flags) ((flags) & CV_MAT_DEPTH_MASK) + +#define CV_MAKETYPE(depth,cn) (CV_MAT_DEPTH(depth) + (((cn)-1) << CV_CN_SHIFT)) +#define CV_MAKE_TYPE CV_MAKETYPE + +#define CV_8UC1 CV_MAKETYPE(CV_8U,1) +#define CV_8UC2 CV_MAKETYPE(CV_8U,2) +#define CV_8UC3 CV_MAKETYPE(CV_8U,3) +#define CV_8UC4 CV_MAKETYPE(CV_8U,4) +#define CV_8UC(n) CV_MAKETYPE(CV_8U,(n)) + +#define CV_8SC1 CV_MAKETYPE(CV_8S,1) +#define CV_8SC2 CV_MAKETYPE(CV_8S,2) +#define CV_8SC3 CV_MAKETYPE(CV_8S,3) +#define CV_8SC4 CV_MAKETYPE(CV_8S,4) +#define CV_8SC(n) CV_MAKETYPE(CV_8S,(n)) + +#define CV_16UC1 CV_MAKETYPE(CV_16U,1) +#define CV_16UC2 CV_MAKETYPE(CV_16U,2) +#define CV_16UC3 CV_MAKETYPE(CV_16U,3) +#define CV_16UC4 CV_MAKETYPE(CV_16U,4) +#define CV_16UC(n) CV_MAKETYPE(CV_16U,(n)) + +#define CV_16SC1 CV_MAKETYPE(CV_16S,1) +#define CV_16SC2 CV_MAKETYPE(CV_16S,2) +#define CV_16SC3 CV_MAKETYPE(CV_16S,3) +#define CV_16SC4 CV_MAKETYPE(CV_16S,4) +#define CV_16SC(n) CV_MAKETYPE(CV_16S,(n)) + +#define CV_32SC1 CV_MAKETYPE(CV_32S,1) +#define CV_32SC2 CV_MAKETYPE(CV_32S,2) +#define CV_32SC3 CV_MAKETYPE(CV_32S,3) +#define CV_32SC4 CV_MAKETYPE(CV_32S,4) +#define CV_32SC(n) CV_MAKETYPE(CV_32S,(n)) + +#define CV_32FC1 CV_MAKETYPE(CV_32F,1) +#define CV_32FC2 CV_MAKETYPE(CV_32F,2) +#define CV_32FC3 CV_MAKETYPE(CV_32F,3) +#define CV_32FC4 CV_MAKETYPE(CV_32F,4) +#define CV_32FC(n) CV_MAKETYPE(CV_32F,(n)) + +#define CV_64FC1 CV_MAKETYPE(CV_64F,1) +#define CV_64FC2 CV_MAKETYPE(CV_64F,2) +#define CV_64FC3 CV_MAKETYPE(CV_64F,3) +#define CV_64FC4 CV_MAKETYPE(CV_64F,4) +#define CV_64FC(n) CV_MAKETYPE(CV_64F,(n)) +//! @} + +//! @name Comparison operation +//! @sa cv::CmpTypes +//! @{ +#define CV_HAL_CMP_EQ 0 +#define CV_HAL_CMP_GT 1 +#define CV_HAL_CMP_GE 2 +#define CV_HAL_CMP_LT 3 +#define CV_HAL_CMP_LE 4 +#define CV_HAL_CMP_NE 5 +//! @} + +//! @name Border processing modes +//! @sa cv::BorderTypes +//! @{ +#define CV_HAL_BORDER_CONSTANT 0 +#define CV_HAL_BORDER_REPLICATE 1 +#define CV_HAL_BORDER_REFLECT 2 +#define CV_HAL_BORDER_WRAP 3 +#define CV_HAL_BORDER_REFLECT_101 4 +#define CV_HAL_BORDER_TRANSPARENT 5 +#define CV_HAL_BORDER_ISOLATED 16 +//! @} + //! @} #endif diff --git a/modules/core/src/arithm.cpp b/modules/core/src/arithm.cpp index c3acca054..8ef3370bd 100644 --- a/modules/core/src/arithm.cpp +++ b/modules/core/src/arithm.cpp @@ -3123,7 +3123,7 @@ void div8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, if( src1 ) div_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); else - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + recip_i(src2, step2, dst, step, width, height, *(const double*)scale); } void div8s( const schar* src1, size_t step1, const schar* src2, size_t step2, @@ -3172,53 +3172,53 @@ void div64f( const double* src1, size_t step1, const double* src2, size_t step2, // Reciprocial //======================================= -void recip8u( const uchar* src1, size_t step1, const uchar* src2, size_t step2, +void recip8u( const uchar*, size_t, const uchar* src2, size_t step2, uchar* dst, size_t step, int width, int height, void* scale) { - CALL_HAL(recip8u, cv_hal_recip8u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + CALL_HAL(recip8u, cv_hal_recip8u, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src2, step2, dst, step, width, height, *(const double*)scale); } -void recip8s( const schar* src1, size_t step1, const schar* src2, size_t step2, +void recip8s( const schar*, size_t, const schar* src2, size_t step2, schar* dst, size_t step, int width, int height, void* scale) { - CALL_HAL(recip8s, cv_hal_recip8s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + CALL_HAL(recip8s, cv_hal_recip8s, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src2, step2, dst, step, width, height, *(const double*)scale); } -void recip16u( const ushort* src1, size_t step1, const ushort* src2, size_t step2, +void recip16u( const ushort*, size_t, const ushort* src2, size_t step2, ushort* dst, size_t step, int width, int height, void* scale) { - CALL_HAL(recip16u, cv_hal_recip16u, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + CALL_HAL(recip16u, cv_hal_recip16u, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src2, step2, dst, step, width, height, *(const double*)scale); } -void recip16s( const short* src1, size_t step1, const short* src2, size_t step2, +void recip16s( const short*, size_t, const short* src2, size_t step2, short* dst, size_t step, int width, int height, void* scale) { - CALL_HAL(recip16s, cv_hal_recip16s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + CALL_HAL(recip16s, cv_hal_recip16s, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src2, step2, dst, step, width, height, *(const double*)scale); } -void recip32s( const int* src1, size_t step1, const int* src2, size_t step2, +void recip32s( const int*, size_t, const int* src2, size_t step2, int* dst, size_t step, int width, int height, void* scale) { - CALL_HAL(recip32s, cv_hal_recip32s, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) - recip_i(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + CALL_HAL(recip32s, cv_hal_recip32s, src2, step2, dst, step, width, height, *(const double*)scale) + recip_i(src2, step2, dst, step, width, height, *(const double*)scale); } -void recip32f( const float* src1, size_t step1, const float* src2, size_t step2, +void recip32f( const float*, size_t, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scale) { - CALL_HAL(recip32f, cv_hal_recip32f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) - recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + CALL_HAL(recip32f, cv_hal_recip32f, src2, step2, dst, step, width, height, *(const double*)scale) + recip_f(src2, step2, dst, step, width, height, *(const double*)scale); } -void recip64f( const double* src1, size_t step1, const double* src2, size_t step2, +void recip64f( const double*, size_t, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scale) { - CALL_HAL(recip64f, cv_hal_recip64f, src1, step1, src2, step2, dst, step, width, height, *(const double*)scale) - recip_f(src1, step1, src2, step2, dst, step, width, height, *(const double*)scale); + CALL_HAL(recip64f, cv_hal_recip64f, src2, step2, dst, step, width, height, *(const double*)scale) + recip_f(src2, step2, dst, step, width, height, *(const double*)scale); } //======================================= diff --git a/modules/core/src/arithm_core.hpp b/modules/core/src/arithm_core.hpp index 4790586eb..b92d47a81 100644 --- a/modules/core/src/arithm_core.hpp +++ b/modules/core/src/arithm_core.hpp @@ -528,7 +528,7 @@ div_f( const T* src1, size_t step1, const T* src2, size_t step2, } template static void -recip_i( const T*, size_t, const T* src2, size_t step2, +recip_i( const T* src2, size_t step2, T* dst, size_t step, int width, int height, double scale ) { step2 /= sizeof(src2[0]); @@ -549,7 +549,7 @@ recip_i( const T*, size_t, const T* src2, size_t step2, } template static void -recip_f( const T*, size_t, const T* src2, size_t step2, +recip_f( const T* src2, size_t step2, T* dst, size_t step, int width, int height, double scale ) { T scale_f = (T)scale; diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index 65866f8bf..69345ca4a 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -42,51 +42,119 @@ // //M*/ -#ifndef __OPENCV_CORE_HAL_REPLACEMENT_HPP__ -#define __OPENCV_CORE_HAL_REPLACEMENT_HPP__ +#ifndef OPENCV_CORE_HAL_REPLACEMENT_HPP +#define OPENCV_CORE_HAL_REPLACEMENT_HPP #include "opencv2/core/hal/interface.h" -inline int hal_ni_add8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_add8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_add16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_add16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_add32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_add32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_add64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sub8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sub8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sub16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sub16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sub32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sub32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sub64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_max8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_max8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_max16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_max16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_max32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_max32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_max64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_min8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_min8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_min16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_min16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_min32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_min32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_min64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_absdiff8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_absdiff8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_absdiff16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_absdiff16s(const short*, size_t, const short*, size_t, short*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_absdiff32s(const int*, size_t, const int*, size_t, int*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_absdiff32f(const float*, size_t, const float*, size_t, float*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_absdiff64f(const double*, size_t, const double*, size_t, double*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_and8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_or8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_xor8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_not8u(const uchar*, size_t, uchar*, size_t, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +#if defined __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +#elif defined _MSC_VER +# pragma warning( push ) +# pragma warning( disable: 4100 ) +#endif +//! @addtogroup core_hal_interface +//! @note Define your functions to override default implementations: +//! @code +//! #undef hal_add8u +//! #define hal_add8u my_add8u +//! @endcode +//! @{ + +/** +Add: _dst[i] = src1[i] + src2[i]_ @n +Sub: _dst[i] = src1[i] - src2[i]_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +*/ +//! @addtogroup core_hal_interface_addsub Element-wise add and subtract +//! @{ +inline int hal_ni_add8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_add64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +inline int hal_ni_sub8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_sub64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + +/** +Minimum: _dst[i] = min(src1[i], src2[i])_ @n +Maximum: _dst[i] = max(src1[i], src2[i])_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +*/ +//! @addtogroup core_hal_interface_minmax Element-wise minimum or maximum +//! @{ +inline int hal_ni_max8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_max64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +inline int hal_ni_min8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_min64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + +/** +Absolute difference: _dst[i] = | src1[i] - src2[i] |_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +@param scale additional multiplier +*/ +//! @addtogroup core_hal_interface_absdiff Element-wise absolute difference +//! @{ +inline int hal_ni_absdiff8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_absdiff64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + +/** +Bitwise AND: _dst[i] = src1[i] & src2[i]_ @n +Bitwise OR: _dst[i] = src1[i] | src2[i]_ @n +Bitwise XOR: _dst[i] = src1[i] ^ src2[i]_ @n +Bitwise NOT: _dst[i] = !src[i]_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images + */ +//! @addtogroup core_hal_interface_logical Bitwise logical operations +//! @{ +inline int hal_ni_and8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_or8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_xor8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_not8u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + +//! @cond IGNORED #define cv_hal_add8u hal_ni_add8u #define cv_hal_add8s hal_ni_add8s #define cv_hal_add16u hal_ni_add16u @@ -126,15 +194,28 @@ inline int hal_ni_not8u(const uchar*, size_t, uchar*, size_t, int, int) { return #define cv_hal_or8u hal_ni_or8u #define cv_hal_xor8u hal_ni_xor8u #define cv_hal_not8u hal_ni_not8u +//! @endcond -inline int hal_ni_cmp8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_cmp8s(const schar*, size_t, const schar*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_cmp16u(const ushort*, size_t, const ushort*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_cmp16s(const short*, size_t, const short*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_cmp32s(const int*, size_t, const int*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_cmp32f(const float*, size_t, const float*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_cmp64f(const double*, size_t, const double*, size_t, uchar*, size_t, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +Compare: _dst[i] = src1[i] op src2[i]_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +@param operation one of (CV_HAL_CMP_EQ, CV_HAL_CMP_GT, ...) +*/ +//! @addtogroup core_hal_interface_compare Element-wise compare +//! @{ +inline int hal_ni_cmp8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_cmp64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, int operation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} +//! @cond IGNORED #define cv_hal_cmp8u hal_ni_cmp8u #define cv_hal_cmp8s hal_ni_cmp8s #define cv_hal_cmp16u hal_ni_cmp16u @@ -142,29 +223,65 @@ inline int hal_ni_cmp64f(const double*, size_t, const double*, size_t, uchar*, s #define cv_hal_cmp32s hal_ni_cmp32s #define cv_hal_cmp32f hal_ni_cmp32f #define cv_hal_cmp64f hal_ni_cmp64f +//! @endcond -inline int hal_ni_mul8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_mul8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_mul16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_mul16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_mul32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_mul32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_mul64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_div8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_div8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_div16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_div16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_div32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_div32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_div64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_recip8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_recip8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_recip16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_recip16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_recip32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_recip32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_recip64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, double) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +Multiply: _dst[i] = scale * src1[i] * src2[i]_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +@param scale additional multiplier +*/ +//! @addtogroup core_hal_interface_multiply Element-wise multiply +//! @{ +inline int hal_ni_mul8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_mul64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} +/** +Divide: _dst[i] = scale * src1[i] / src2[i]_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +@param scale additional multiplier +*/ +//! @addtogroup core_hal_interface_divide Element-wise divide +//! @{ +inline int hal_ni_div8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_div64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + +/** +Computes reciprocial: _dst[i] = scale / src[i]_ +@param src_data,src_step source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +@param scale additional multiplier + */ +//! @addtogroup core_hal_interface_reciprocial Element-wise reciprocial +//! @{ +inline int hal_ni_recip8u(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip8s(const schar *src_data, size_t src_step, schar *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip16u(const ushort *src_data, size_t src_step, ushort *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip16s(const short *src_data, size_t src_step, short *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip32s(const int *src_data, size_t src_step, int *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip32f(const float *src_data, size_t src_step, float *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_recip64f(const double *src_data, size_t src_step, double *dst_data, size_t dst_step, int width, int height, double scale) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} + +//! @cond IGNORED #define cv_hal_mul8u hal_ni_mul8u #define cv_hal_mul8s hal_ni_mul8s #define cv_hal_mul16u hal_ni_mul16u @@ -186,15 +303,28 @@ inline int hal_ni_recip64f(const double*, size_t, const double*, size_t, double* #define cv_hal_recip32s hal_ni_recip32s #define cv_hal_recip32f hal_ni_recip32f #define cv_hal_recip64f hal_ni_recip64f +//! @endcond -inline int hal_ni_addWeighted8u(const uchar*, size_t, const uchar*, size_t, uchar*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_addWeighted8s(const schar*, size_t, const schar*, size_t, schar*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_addWeighted16u(const ushort*, size_t, const ushort*, size_t, ushort*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_addWeighted16s(const short*, size_t, const short*, size_t, short*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_addWeighted32s(const int*, size_t, const int*, size_t, int*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_addWeighted32f(const float*, size_t, const float*, size_t, float*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_addWeighted64f(const double*, size_t, const double*, size_t, double*, size_t, int, int, const double*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +Computes weighted sum of two arrays using formula: _dst[i] = a * src1[i] + b * src2[i] + c_ +@param src1_data,src1_step first source image data and step +@param src2_data,src2_step second source image data and step +@param dst_data,dst_step destination image data and step +@param width,height dimensions of the images +@param scalars numbers _a_, _b_, and _c_ + */ +//! @addtogroup core_hal_interface_addWeighted Element-wise weighted sum +//! @{ +inline int hal_ni_addWeighted8u(const uchar *src1_data, size_t src1_step, const uchar *src2_data, size_t src2_step, uchar *dst_data, size_t dst_step, int width, int height, const double scalars[3]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted8s(const schar *src1_data, size_t src1_step, const schar *src2_data, size_t src2_step, schar *dst_data, size_t dst_step, int width, int height, const double scalars[3]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted16u(const ushort *src1_data, size_t src1_step, const ushort *src2_data, size_t src2_step, ushort *dst_data, size_t dst_step, int width, int height, const double scalars[3]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted16s(const short *src1_data, size_t src1_step, const short *src2_data, size_t src2_step, short *dst_data, size_t dst_step, int width, int height, const double scalars[3]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted32s(const int *src1_data, size_t src1_step, const int *src2_data, size_t src2_step, int *dst_data, size_t dst_step, int width, int height, const double scalars[3]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted32f(const float *src1_data, size_t src1_step, const float *src2_data, size_t src2_step, float *dst_data, size_t dst_step, int width, int height, const double scalars[3]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_addWeighted64f(const double *src1_data, size_t src1_step, const double *src2_data, size_t src2_step, double *dst_data, size_t dst_step, int width, int height, const double scalars[3]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} +//! @cond IGNORED #define cv_hal_addWeighted8u hal_ni_addWeighted8u #define cv_hal_addWeighted8s hal_ni_addWeighted8s #define cv_hal_addWeighted16u hal_ni_addWeighted16u @@ -202,26 +332,57 @@ inline int hal_ni_addWeighted64f(const double*, size_t, const double*, size_t, d #define cv_hal_addWeighted32s hal_ni_addWeighted32s #define cv_hal_addWeighted32f hal_ni_addWeighted32f #define cv_hal_addWeighted64f hal_ni_addWeighted64f +//! @endcond -inline int hal_ni_split8u(const uchar*, uchar**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_split16u(const ushort*, ushort**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_split32s(const int*, int**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_split64s(const int64*, int64**, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param src_data array of interleaved values (__len__ x __cn__ items) [ B, G, R, B, G, R, ...] +@param dst_data array of pointers to destination arrays (__cn__ items x __len__ items) [ [B, B, ...], [G, G, ...], [R, R, ...] ] +@param len number of elements +@param cn number of channels + */ +//! @addtogroup core_hal_interface_split Channel split +//! @{ +inline int hal_ni_split8u(const uchar *src_data, uchar **dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_split16u(const ushort *src_data, ushort **dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_split32s(const int *src_data, int **dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_split64s(const int64 *src_data, int64 **dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} +//! @cond IGNORED #define cv_hal_split8u hal_ni_split8u #define cv_hal_split16u hal_ni_split16u #define cv_hal_split32s hal_ni_split32s #define cv_hal_split64s hal_ni_split64s +//! @endcond -inline int hal_ni_merge8u(const uchar**, uchar*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_merge16u(const ushort**, ushort*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_merge32s(const int**, int*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_merge64s(const int64**, int64*, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param src_data array of pointers to source arrays (__cn__ items x __len__ items) [ [B, B, ...], [G, G, ...], [R, R, ...] ] +@param dst_data destination array of interleaved values (__len__ x __cn__ items) [ B, G, R, B, G, R, ...] +@param len number of elements +@param cn number of channels + */ +//! @addtogroup core_hal_interface_merge Channel merge +//! @{ +inline int hal_ni_merge8u(const uchar **src_data, uchar *dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_merge16u(const ushort **src_data, ushort *dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_merge32s(const int **src_data, int *dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int cn) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @} +//! @cond IGNORED #define cv_hal_merge8u hal_ni_merge8u #define cv_hal_merge16u hal_ni_merge16u #define cv_hal_merge32s hal_ni_merge32s #define cv_hal_merge64s hal_ni_merge64s +//! @endcond + +//! @} + +#if defined __GNUC__ +# pragma GCC diagnostic pop +#elif defined _MSC_VER +# pragma warning( pop ) +#endif #include "custom_hal.hpp" diff --git a/modules/imgproc/include/opencv2/imgproc.hpp b/modules/imgproc/include/opencv2/imgproc.hpp index e1f9348b1..6d9ffb490 100644 --- a/modules/imgproc/include/opencv2/imgproc.hpp +++ b/modules/imgproc/include/opencv2/imgproc.hpp @@ -213,6 +213,11 @@ location of points on the plane, building special graphs (such as NNG,RNG), and @defgroup imgproc_feature Feature Detection @defgroup imgproc_object Object Detection @defgroup imgproc_c C API + @defgroup imgproc_hal Hardware Acceleration Layer + @{ + @defgroup imgproc_hal_functions Functions + @defgroup imgproc_hal_interface Interface + @} @} */ diff --git a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp index eca34e7e7..6ed492bcb 100644 --- a/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp +++ b/modules/imgproc/include/opencv2/imgproc/hal/hal.hpp @@ -7,7 +7,7 @@ namespace cv { namespace hal { -//! @addtogroup core_hal_functions +//! @addtogroup imgproc_hal_functions //! @{ struct CV_EXPORTS Filter2D @@ -45,9 +45,9 @@ struct CV_EXPORTS SepFilter2D }; -struct CV_EXPORTS MorphContext +struct CV_EXPORTS Morph { - static Ptr create(int op, int src_type, int dst_type, int max_width, int max_height, + static Ptr create(int op, int src_type, int dst_type, int max_width, int max_height, int kernel_type, uchar * kernel_data, size_t kernel_step, int kernel_width, int kernel_height, int anchor_x, int anchor_y, @@ -56,10 +56,25 @@ struct CV_EXPORTS MorphContext virtual void apply(uchar * src_data, size_t src_step, uchar * dst_data, size_t dst_step, int width, int height, int roi_width, int roi_height, int roi_x, int roi_y, int roi_width2, int roi_height2, int roi_x2, int roi_y2) = 0; - virtual ~MorphContext() {} + virtual ~Morph() {} }; +CV_EXPORTS void resize(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + double inv_scale_x, double inv_scale_y, int interpolation); + +CV_EXPORTS void warpAffine(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[6], int interpolation, int borderType, const double borderValue[4]); + +CV_EXPORTS void warpPerspectve(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[9], int interpolation, int borderType, const double borderValue[4]); + //! @} }} diff --git a/modules/imgproc/include/opencv2/imgproc/hal/interface.h b/modules/imgproc/include/opencv2/imgproc/hal/interface.h new file mode 100644 index 000000000..9d2a3e5d5 --- /dev/null +++ b/modules/imgproc/include/opencv2/imgproc/hal/interface.h @@ -0,0 +1,26 @@ +#ifndef OPENCV_IMGPROC_HAL_INTERFACE_H +#define OPENCV_IMGPROC_HAL_INTERFACE_H + +//! @addtogroup imgproc_hal_interface +//! @{ + +//! @name Interpolation modes +//! @sa cv::InterpolationFlags +//! @{ +#define CV_HAL_INTER_NEAREST 0 +#define CV_HAL_INTER_LINEAR 1 +#define CV_HAL_INTER_CUBIC 2 +#define CV_HAL_INTER_AREA 3 +#define CV_HAL_INTER_LANCZOS4 4 +//! @} + +//! @name Morphology operations +//! @sa cv::MorphTypes +//! @{ +#define MORPH_ERODE 0 +#define MORPH_DILATE 1 +//! @} + +//! @} + +#endif diff --git a/modules/imgproc/src/hal_replacement.hpp b/modules/imgproc/src/hal_replacement.hpp index e043c4027..2b681f6cf 100644 --- a/modules/imgproc/src/hal_replacement.hpp +++ b/modules/imgproc/src/hal_replacement.hpp @@ -1,34 +1,312 @@ +/*M/////////////////////////////////////////////////////////////////////////////////////// +// +// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. +// +// By downloading, copying, installing or using the software you agree to this license. +// If you do not agree to this license, do not download, install, +// copy or use the software. +// +// +// License Agreement +// For Open Source Computer Vision Library +// +// Copyright (C) 2000-2008, Intel Corporation, all rights reserved. +// Copyright (C) 2009, Willow Garage Inc., all rights reserved. +// Copyright (C) 2013, OpenCV Foundation, all rights reserved. +// Copyright (C) 2015, Itseez Inc., all rights reserved. +// Third party copyrights are property of their respective owners. +// +// Redistribution and use in source and binary forms, with or without modification, +// are permitted provided that the following conditions are met: +// +// * Redistribution's of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// * Redistribution's in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * The name of the copyright holders may not be used to endorse or promote products +// derived from this software without specific prior written permission. +// +// This software is provided by the copyright holders and contributors "as is" and +// any express or implied warranties, including, but not limited to, the implied +// warranties of merchantability and fitness for a particular purpose are disclaimed. +// In no event shall the Intel Corporation or contributors be liable for any direct, +// indirect, incidental, special, exemplary, or consequential damages +// (including, but not limited to, procurement of substitute goods or services; +// loss of use, data, or profits; or business interruption) however caused +// and on any theory of liability, whether in contract, strict liability, +// or tort (including negligence or otherwise) arising in any way out of +// the use of this software, even if advised of the possibility of such damage. +// +//M*/ + #ifndef OPENCV_IMGPROC_HAL_REPLACEMENT_HPP #define OPENCV_IMGPROC_HAL_REPLACEMENT_HPP #include "opencv2/core/hal/interface.h" +#if defined __GNUC__ +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wunused-parameter" +#elif defined _MSC_VER +# pragma warning( push ) +# pragma warning( disable: 4100 ) +#endif + +//! @addtogroup imgproc_hal_interface +//! @note Define your functions to override default implementations: +//! @code +//! #undef hal_add8u +//! #define hal_add8u my_add8u +//! @endcode +//! @{ + +/** +@brief Dummy structure storing filtering context + +Users can convert this pointer to any type they want. Initialisation and destruction should be made in Init and Free function implementations correspondingly. +Example: +@code{.cpp} +int my_hal_filterInit(cvhalFilter2D **context, ...) { + context = static_cast(new MyFilterData()); + //... init +} + +int my_hal_filterFree(cvhalFilter2D *context) { + MyFilterData *c = static_cast(context); + delete c; +} +@endcode + */ struct cvhalFilter2D {}; -inline int hal_ni_filterInit(cvhalFilter2D **, uchar *, size_t, int, int, int, int, int, int, int, int, double, int, int, bool, bool) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_filter(cvhalFilter2D *, uchar *, size_t, uchar *, size_t, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_filterFree(cvhalFilter2D *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_filterInit + @param context double pointer to user-defined context + @param kernel_data pointer to kernel data + @param kernel_step kernel step + @param kernel_type kernel type (CV_8U, ...) + @param kernel_width kernel width + @param kernel_height kernel height + @param max_width max possible image width, can be used to allocate working buffers + @param max_height max possible image height + @param src_type source image type + @param dst_type destination image type + @param borderType border processing mode (CV_HAL_BORDER_REFLECT, ...) + @param delta added to pixel values + @param anchor_x relative X position of center point within the kernel + @param anchor_y relative Y position of center point within the kernel + @param allowSubmatrix indicates whether the submatrices will be allowed as source image + @param allowInplace indicates whether the inplace operation will be possible + @sa cv::filter2D, cv::hal::Filter2D + */ +inline int hal_ni_filterInit(cvhalFilter2D **context, uchar *kernel_data, size_t kernel_step, int kernel_type, int kernel_width, int kernel_height, int max_width, int max_height, int src_type, int dst_type, int borderType, double delta, int anchor_x, int anchor_y, bool allowSubmatrix, bool allowInplace) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_filter + @param context pointer to user-defined context + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param width images width + @param height images height + @param full_width full width of source image (outside the ROI) + @param full_height full height of source image (outside the ROI) + @param offset_x source image ROI offset X + @param offset_y source image ROI offset Y + @sa cv::filter2D, cv::hal::Filter2D + */ +inline int hal_ni_filter(cvhalFilter2D *context, uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_filterFree + @param context pointer to user-defined context + @sa cv::filter2D, cv::hal::Filter2D + */ +inline int hal_ni_filterFree(cvhalFilter2D *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @cond IGNORED #define cv_hal_filterInit hal_ni_filterInit #define cv_hal_filter hal_ni_filter #define cv_hal_filterFree hal_ni_filterFree +//! @endcond -inline int hal_ni_sepFilterInit(cvhalFilter2D **, int, int, int, uchar *, size_t, int, int, uchar *, size_t, int, int, int, int, double, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sepFilter(cvhalFilter2D *, uchar *, size_t, uchar*, size_t, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_sepFilterFree(cvhalFilter2D *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_sepFilterInit + @param context double pointer to user-defined context + @param src_type source image type + @param dst_type destination image type + @param kernel_type kernels type + @param kernelx_data pointer to x-kernel data + @param kernelx_step x-kernel step + @param kernelx_width x-kernel width + @param kernelx_height x-kernel height + @param kernely_data pointer to y-kernel data + @param kernely_step y-kernel step + @param kernely_width y-kernel width + @param kernely_height y-kernel height + @param anchor_x relative X position of center point within the kernel + @param anchor_y relative Y position of center point within the kernel + @param delta added to pixel values + @param borderType border processing mode (CV_HAL_BORDER_REFLECT, ...) + @sa cv::sepFilter2D, cv::hal::SepFilter2D + */ +inline int hal_ni_sepFilterInit(cvhalFilter2D **context, int src_type, int dst_type, int kernel_type, uchar *kernelx_data, size_t kernelx_step, int kernelx_width, int kernelx_height, uchar *kernely_data, size_t kernely_step, int kernely_width, int kernely_height, int anchor_x, int anchor_y, double delta, int borderType) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_sepFilter + @param context pointer to user-defined context + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param width images width + @param height images height + @param full_width full width of source image (outside the ROI) + @param full_height full height of source image (outside the ROI) + @param offset_x source image ROI offset X + @param offset_y source image ROI offset Y + @sa cv::sepFilter2D, cv::hal::SepFilter2D + */ +inline int hal_ni_sepFilter(cvhalFilter2D *context, uchar *src_data, size_t src_step, uchar* dst_data, size_t dst_step, int width, int height, int full_width, int full_height, int offset_x, int offset_y) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_sepFilterFree + @param context pointer to user-defined context + @sa cv::sepFilter2D, cv::hal::SepFilter2D + */ +inline int hal_ni_sepFilterFree(cvhalFilter2D *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @cond IGNORED #define cv_hal_sepFilterInit hal_ni_sepFilterInit #define cv_hal_sepFilter hal_ni_sepFilter #define cv_hal_sepFilterFree hal_ni_sepFilterFree +//! @endcond -inline int hal_ni_morphInit(cvhalFilter2D **, int, int, int, int, int, int, uchar *, size_t, int, int, int, int, int, const double[4], int, bool, bool) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_morph(cvhalFilter2D *, uchar *, size_t, uchar *, size_t, int, int, int, int, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_morphFree(cvhalFilter2D *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_morphInit + @param context double pointer to user-defined context + @param operation morphology operation CV_HAL_MORPH_ERODE or CV_HAL_MORPH_DILATE + @param src_type source image type + @param dst_type destination image type + @param max_width max possible image width, can be used to allocate working buffers + @param max_height max possible image height + @param kernel_type kernel type (CV_8U, ...) + @param kernel_data pointer to kernel data + @param kernel_step kernel step + @param kernel_width kernel width + @param kernel_height kernel height + @param anchor_x relative X position of center point within the kernel + @param anchor_y relative Y position of center point within the kernel + @param borderType border processing mode (CV_HAL_BORDER_REFLECT, ...) + @param borderValue values to use for CV_HAL_BORDER_CONSTANT mode + @param iterations number of iterations + @param allowSubmatrix indicates whether the submatrices will be allowed as source image + @param allowInplace indicates whether the inplace operation will be possible + @sa cv::erode, cv::dilate, cv::morphologyEx, cv::hal::Morph + */ +inline int hal_ni_morphInit(cvhalFilter2D **context, int operation, int src_type, int dst_type, int max_width, int max_height, int kernel_type, uchar *kernel_data, size_t kernel_step, int kernel_width, int kernel_height, int anchor_x, int anchor_y, int borderType, const double borderValue[4], int iterations, bool allowSubmatrix, bool allowInplace) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_morph + @param context pointer to user-defined context + @param src_data source image data + @param src_step source image step + @param dst_data destination image data + @param dst_step destination image step + @param width images width + @param height images height + @param src_full_width full width of source image (outside the ROI) + @param src_full_height full height of source image (outside the ROI) + @param src_roi_x source image ROI X offset + @param src_roi_y source image ROI Y offset + @param dst_full_width full width of destination image + @param dst_full_height full height of destination image + @param dst_roi_x destination image ROI X offset + @param dst_roi_y destination image ROI Y offset + @sa cv::erode, cv::dilate, cv::morphologyEx, cv::hal::Morph + */ +inline int hal_ni_morph(cvhalFilter2D *context, uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step, int width, int height, int src_full_width, int src_full_height, int src_roi_x, int src_roi_y, int dst_full_width, int dst_full_height, int dst_roi_x, int dst_roi_y) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_morphFree + @param context pointer to user-defined context + @sa cv::erode, cv::dilate, cv::morphologyEx, cv::hal::Morph + */ +inline int hal_ni_morphFree(cvhalFilter2D *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +//! @cond IGNORED #define cv_hal_morphInit hal_ni_morphInit #define cv_hal_morph hal_ni_morph #define cv_hal_morphFree hal_ni_morphFree +//! @endcond + +/** + @brief hal_resize + @param src_type source and destination image type + @param src_data source image data + @param src_step source image step + @param src_width source image width + @param src_height source image height + @param dst_data destination image data + @param dst_step destination image step + @param dst_width destination image width + @param dst_height destination image height + @param inv_scale_x inversed scale X coefficient + @param inv_scale_y inversed scale Y coefficient + @param interpolation interpolation mode (CV_HAL_INTER_NEAREST, ...) + @sa cv::resize, cv::hal::resize + */ +inline int hal_ni_resize(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, double inv_scale_x, double inv_scale_y, int interpolation) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpAffine + @param src_type source and destination image type + @param src_data source image data + @param src_step source image step + @param src_width source image width + @param src_height source image height + @param dst_data destination image data + @param dst_step destination image step + @param dst_width destination image width + @param dst_height destination image height + @param M 3x2 matrix with transform coefficients + @param interpolation interpolation mode (CV_HAL_INTER_NEAREST, ...) + @param borderType border processing mode (CV_HAL_BORDER_REFLECT, ...) + @param borderValue values to use for CV_HAL_BORDER_CONSTANT mode + @sa cv::warpAffine, cv::hal::warpAffine + */ +inline int hal_ni_warpAffine(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[6], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** + @brief hal_warpPerspectve + @param src_type source and destination image type + @param src_data source image data + @param src_step source image step + @param src_width source image width + @param src_height source image height + @param dst_data destination image data + @param dst_step destination image step + @param dst_width destination image width + @param dst_height destination image height + @param M 3x3 matrix with transform coefficients + @param interpolation interpolation mode (CV_HAL_INTER_NEAREST, ...) + @param borderType border processing mode (CV_HAL_BORDER_REFLECT, ...) + @param borderValue values to use for CV_HAL_BORDER_CONSTANT mode + @sa cv::warpPerspective, cv::hal::warpPerspective + */ +inline int hal_ni_warpPerspectve(int src_type, const uchar *src_data, size_t src_step, int src_width, int src_height, uchar *dst_data, size_t dst_step, int dst_width, int dst_height, const double M[9], int interpolation, int borderType, const double borderValue[4]) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +//! @cond IGNORED +#define cv_hal_resize hal_ni_resize +#define cv_hal_warpAffine hal_ni_warpAffine +#define cv_hal_warpPerspective hal_ni_warpPerspectve +//! @endcond + +//! @} + +#if defined __GNUC__ +# pragma GCC diagnostic pop +#elif defined _MSC_VER +# pragma warning( pop ) +#endif + #include "custom_hal.hpp" -#endif // OPENCV_IMGPROC_HAL_REPLACEMENT_HPP +#endif diff --git a/modules/imgproc/src/imgwarp.cpp b/modules/imgproc/src/imgwarp.cpp index a7f8eee44..d346965ed 100644 --- a/modules/imgproc/src/imgwarp.cpp +++ b/modules/imgproc/src/imgwarp.cpp @@ -49,6 +49,7 @@ #include "precomp.hpp" #include "opencl_kernels_imgproc.hpp" +#include "hal_replacement.hpp" using namespace cv; @@ -3091,8 +3092,8 @@ static bool ocl_resize( InputArray _src, OutputArray _dst, Size dsize, #endif #if IPP_VERSION_X100 >= 710 -static bool ipp_resize_mt( Mat src, Mat dst, - double inv_scale_x, double inv_scale_y, int interpolation) +static bool ipp_resize_mt(Mat & src, Mat & dst, + double inv_scale_x, double inv_scale_y, int interpolation) { int mode = -1; if (interpolation == INTER_LINEAR && src.rows >= 2 && src.cols >= 2) @@ -3113,15 +3114,24 @@ static bool ipp_resize_mt( Mat src, Mat dst, } #endif -} +//================================================================================================== +namespace hal { - -////////////////////////////////////////////////////////////////////////////////////////// - -void cv::resize( InputArray _src, OutputArray _dst, Size dsize, - double inv_scale_x, double inv_scale_y, int interpolation ) +void resize(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + double inv_scale_x, double inv_scale_y, int interpolation) { + CV_Assert((dst_width * dst_height > 0) || (inv_scale_x > 0 && inv_scale_y > 0)); + if (inv_scale_x < DBL_EPSILON || inv_scale_y < DBL_EPSILON) + { + inv_scale_x = static_cast(dst_width) / src_width; + inv_scale_y = static_cast(dst_height) / src_height; + } + + CALL_HAL(resize, cv_hal_resize, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, inv_scale_x, inv_scale_y, interpolation); + static ResizeFunc linear_tab[] = { resizeGeneric_< @@ -3226,24 +3236,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, resizeArea_, 0 }; - Size ssize = _src.size(); - - CV_Assert( ssize.area() > 0 ); - CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) ); - if( dsize.area() == 0 ) - { - dsize = Size(saturate_cast(ssize.width*inv_scale_x), - saturate_cast(ssize.height*inv_scale_y)); - CV_Assert( dsize.area() > 0 ); - } - else - { - inv_scale_x = (double)dsize.width/ssize.width; - inv_scale_y = (double)dsize.height/ssize.height; - } - - - int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); + int depth = CV_MAT_DEPTH(src_type), cn = CV_MAT_CN(src_type); double scale_x = 1./inv_scale_x, scale_y = 1./inv_scale_y; int iscale_x = saturate_cast(scale_x); @@ -3252,42 +3245,30 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, bool is_area_fast = std::abs(scale_x - iscale_x) < DBL_EPSILON && std::abs(scale_y - iscale_y) < DBL_EPSILON; + Size dsize = Size(saturate_cast(src_width*inv_scale_x), + saturate_cast(src_height*inv_scale_y)); + CV_Assert( dsize.area() > 0 ); - CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() && _src.cols() > 10 && _src.rows() > 10, - ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation)) - - Mat src = _src.getMat(); - _dst.create(dsize, src.type()); - Mat dst = _dst.getMat(); - - if (dsize == ssize) { - // Source and destination are of same size. Use simple copy. - src.copyTo(dst); - return; - } - -#ifdef HAVE_TEGRA_OPTIMIZATION - if (tegra::useTegra() && tegra::resize(src, dst, (float)inv_scale_x, (float)inv_scale_y, interpolation)) - return; -#endif + Mat src(Size(src_width, src_height), src_type, const_cast(src_data), src_step); + Mat dst(dsize, src_type, dst_data, dst_step); #ifdef HAVE_IPP int mode = -1; - if (interpolation == INTER_LINEAR && _src.rows() >= 2 && _src.cols() >= 2) + if (interpolation == INTER_LINEAR && src_height >= 2 && src_width >= 2) mode = INTER_LINEAR; - else if (interpolation == INTER_CUBIC && _src.rows() >= 4 && _src.cols() >= 4) + else if (interpolation == INTER_CUBIC && src_height >= 4 && src_width >= 4) mode = INTER_CUBIC; const double IPP_RESIZE_EPS = 1e-10; - double ex = fabs((double)dsize.width / _src.cols() - inv_scale_x) / inv_scale_x; - double ey = fabs((double)dsize.height / _src.rows() - inv_scale_y) / inv_scale_y; + double ex = fabs((double)dsize.width / src_width - inv_scale_x) / inv_scale_x; + double ey = fabs((double)dsize.height / src_height - inv_scale_y) / inv_scale_y; #endif CV_IPP_RUN(IPP_VERSION_X100 >= 710 && ((ex < IPP_RESIZE_EPS && ey < IPP_RESIZE_EPS && depth != CV_64F) || (ex == 0 && ey == 0 && depth == CV_64F)) && (interpolation == INTER_LINEAR || interpolation == INTER_CUBIC) && !(interpolation == INTER_LINEAR && is_area_fast && iscale_x == 2 && iscale_y == 2 && depth == CV_8U) && mode >= 0 && (cn == 1 || cn == 3 || cn == 4) && (depth == CV_16U || depth == CV_16S || depth == CV_32F || - (depth == CV_64F && mode == INTER_LINEAR)), ipp_resize_mt(src, dst, inv_scale_x, inv_scale_y, interpolation)) - + (depth == CV_64F && mode == INTER_LINEAR)), + ipp_resize_mt(src, dst, inv_scale_x, inv_scale_y, interpolation)) if( interpolation == INTER_NEAREST ) { @@ -3311,7 +3292,7 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, if( is_area_fast ) { int area = iscale_x*iscale_y; - size_t srcstep = src.step / src.elemSize1(); + size_t srcstep = src_step / src.elemSize1(); AutoBuffer _ofs(area + dsize.width*cn); int* ofs = _ofs; int* xofs = ofs + area; @@ -3337,11 +3318,11 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, ResizeAreaFunc func = area_tab[depth]; CV_Assert( func != 0 && cn <= 4 ); - AutoBuffer _xytab((ssize.width + ssize.height)*2); - DecimateAlpha* xtab = _xytab, *ytab = xtab + ssize.width*2; + AutoBuffer _xytab((src_width + src_height)*2); + DecimateAlpha* xtab = _xytab, *ytab = xtab + src_width*2; - int xtab_size = computeResizeAreaTab(ssize.width, dsize.width, cn, scale_x, xtab); - int ytab_size = computeResizeAreaTab(ssize.height, dsize.height, 1, scale_y, ytab); + int xtab_size = computeResizeAreaTab(src_width, dsize.width, cn, scale_x, xtab); + int ytab_size = computeResizeAreaTab(src_height, dsize.height, 1, scale_y, ytab); AutoBuffer _tabofs(dsize.height + 1); int* tabofs = _tabofs; @@ -3409,11 +3390,11 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, fx = 0, sx = 0; } - if( sx + ksize2 >= ssize.width ) + if( sx + ksize2 >= src_width ) { xmax = std::min( xmax, dx ); - if( sx >= ssize.width-1 && (interpolation != INTER_CUBIC && interpolation != INTER_LANCZOS4)) - fx = 0, sx = ssize.width-1; + if( sx >= src_width-1 && (interpolation != INTER_CUBIC && interpolation != INTER_LANCZOS4)) + fx = 0, sx = src_width-1; } for( k = 0, sx *= cn; k < cn; k++ ) @@ -3486,6 +3467,46 @@ void cv::resize( InputArray _src, OutputArray _dst, Size dsize, fixpt ? (void*)ibeta : (void*)beta, xmin, xmax, ksize ); } +} // cv::hal:: +} // cv:: + +//================================================================================================== + +void cv::resize( InputArray _src, OutputArray _dst, Size dsize, + double inv_scale_x, double inv_scale_y, int interpolation ) +{ + Size ssize = _src.size(); + + CV_Assert( ssize.area() > 0 ); + CV_Assert( dsize.area() > 0 || (inv_scale_x > 0 && inv_scale_y > 0) ); + if( dsize.area() == 0 ) + { + dsize = Size(saturate_cast(ssize.width*inv_scale_x), + saturate_cast(ssize.height*inv_scale_y)); + CV_Assert( dsize.area() > 0 ); + } + else + { + inv_scale_x = (double)dsize.width/ssize.width; + inv_scale_y = (double)dsize.height/ssize.height; + } + + CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat() && _src.cols() > 10 && _src.rows() > 10, + ocl_resize(_src, _dst, dsize, inv_scale_x, inv_scale_y, interpolation)) + + Mat src = _src.getMat(); + _dst.create(dsize, src.type()); + Mat dst = _dst.getMat(); + + if (dsize == ssize) { + // Source and destination are of same size. Use simple copy. + src.copyTo(dst); + return; + } + + hal::resize(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, inv_scale_x, inv_scale_y, interpolation); +} + /****************************************************************************************\ * General warping (affine, perspective, remap) * @@ -5232,7 +5253,7 @@ class WarpAffineInvoker : { public: WarpAffineInvoker(const Mat &_src, Mat &_dst, int _interpolation, int _borderType, - const Scalar &_borderValue, int *_adelta, int *_bdelta, double *_M) : + const Scalar &_borderValue, int *_adelta, int *_bdelta, const double *_M) : ParallelLoopBody(), src(_src), dst(_dst), interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue), adelta(_adelta), bdelta(_bdelta), M(_M) @@ -5410,7 +5431,7 @@ private: int interpolation, borderType; Scalar borderValue; int *adelta, *bdelta; - double *M; + const double *M; }; @@ -5569,8 +5590,40 @@ static bool ocl_warpTransform(InputArray _src, OutputArray _dst, InputArray _M0, #endif +namespace hal { + +void warpAffine(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[6], int interpolation, int borderType, const double borderValue[4]) +{ + CALL_HAL(warpAffine, cv_hal_warpAffine, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue); + + Mat src(Size(src_width, src_height), src_type, const_cast(src_data), src_step); + Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step); + + int x; + AutoBuffer _abdelta(dst.cols*2); + int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols; + const int AB_BITS = MAX(10, (int)INTER_BITS); + const int AB_SCALE = 1 << AB_BITS; + + for( x = 0; x < dst.cols; x++ ) + { + adelta[x] = saturate_cast(M[0]*x*AB_SCALE); + bdelta[x] = saturate_cast(M[3]*x*AB_SCALE); + } + + Range range(0, dst.rows); + WarpAffineInvoker invoker(src, dst, interpolation, borderType, + Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3]), + adelta, bdelta, M); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } +} // hal:: +} // cv:: + void cv::warpAffine( InputArray _src, OutputArray _dst, InputArray _M0, Size dsize, @@ -5596,11 +5649,6 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 2 && M0.cols == 3 ); M0.convertTo(matM, matM.type()); -#ifdef HAVE_TEGRA_OPTIMIZATION - if( tegra::useTegra() && tegra::warpAffine(src, dst, M, flags, borderType, borderValue) ) - return; -#endif - if( !(flags & WARP_INVERSE_MAP) ) { double D = M[0]*M[4] - M[1]*M[3]; @@ -5613,12 +5661,6 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, M[2] = b1; M[5] = b2; } - int x; - AutoBuffer _abdelta(dst.cols*2); - int* adelta = &_abdelta[0], *bdelta = adelta + dst.cols; - const int AB_BITS = MAX(10, (int)INTER_BITS); - const int AB_SCALE = 1 << AB_BITS; - #if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK CV_IPP_CHECK() { @@ -5683,16 +5725,8 @@ void cv::warpAffine( InputArray _src, OutputArray _dst, } #endif - for( x = 0; x < dst.cols; x++ ) - { - adelta[x] = saturate_cast(M[0]*x*AB_SCALE); - bdelta[x] = saturate_cast(M[3]*x*AB_SCALE); - } - - Range range(0, dst.rows); - WarpAffineInvoker invoker(src, dst, interpolation, borderType, - borderValue, adelta, bdelta, M); - parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + hal::warpAffine(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, + M, interpolation, borderType, borderValue.val); } @@ -5703,7 +5737,7 @@ class WarpPerspectiveInvoker : public ParallelLoopBody { public: - WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, double *_M, int _interpolation, + WarpPerspectiveInvoker(const Mat &_src, Mat &_dst, const double *_M, int _interpolation, int _borderType, const Scalar &_borderValue) : ParallelLoopBody(), src(_src), dst(_dst), M(_M), interpolation(_interpolation), borderType(_borderType), borderValue(_borderValue) @@ -6037,12 +6071,11 @@ public: private: Mat src; Mat dst; - double* M; + const double* M; int interpolation, borderType; Scalar borderValue; }; - #if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK class IPPWarpPerspectiveInvoker : public ParallelLoopBody @@ -6095,8 +6128,26 @@ private: const IPPWarpPerspectiveInvoker& operator= (const IPPWarpPerspectiveInvoker&); }; #endif + +namespace hal { + +void warpPerspectve(int src_type, + const uchar * src_data, size_t src_step, int src_width, int src_height, + uchar * dst_data, size_t dst_step, int dst_width, int dst_height, + const double M[9], int interpolation, int borderType, const double borderValue[4]) +{ + CALL_HAL(warpPerspective, cv_hal_warpPerspective, src_type, src_data, src_step, src_width, src_height, dst_data, dst_step, dst_width, dst_height, M, interpolation, borderType, borderValue); + Mat src(Size(src_width, src_height), src_type, const_cast(src_data), src_step); + Mat dst(Size(dst_width, dst_height), src_type, dst_data, dst_step); + + Range range(0, dst.rows); + WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, Scalar(borderValue[0], borderValue[1], borderValue[2], borderValue[3])); + parallel_for_(range, invoker, dst.total()/(double)(1<<16)); } +} // hal:: +} // cv:: + void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, Size dsize, int flags, int borderType, const Scalar& borderValue ) { @@ -6122,12 +6173,6 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, CV_Assert( (M0.type() == CV_32F || M0.type() == CV_64F) && M0.rows == 3 && M0.cols == 3 ); M0.convertTo(matM, matM.type()); -#ifdef HAVE_TEGRA_OPTIMIZATION - if( tegra::useTegra() && tegra::warpPerspective(src, dst, M, flags, borderType, borderValue) ) - return; -#endif - - #if defined (HAVE_IPP) && IPP_VERSION_X100 >= 810 && IPP_DISABLE_BLOCK CV_IPP_CHECK() { @@ -6190,9 +6235,8 @@ void cv::warpPerspective( InputArray _src, OutputArray _dst, InputArray _M0, if( !(flags & WARP_INVERSE_MAP) ) invert(matM, matM); - Range range(0, dst.rows); - WarpPerspectiveInvoker invoker(src, dst, M, interpolation, borderType, borderValue); - parallel_for_(range, invoker, dst.total()/(double)(1<<16)); + hal::warpPerspectve(src.type(), src.data, src.step, src.cols, src.rows, dst.data, dst.step, dst.cols, dst.rows, + matM.ptr(), interpolation, borderType, borderValue.val); } diff --git a/modules/imgproc/src/morph.cpp b/modules/imgproc/src/morph.cpp index d70edd071..3b799803c 100644 --- a/modules/imgproc/src/morph.cpp +++ b/modules/imgproc/src/morph.cpp @@ -1079,7 +1079,7 @@ namespace cv // ===== 1. replacement implementation -struct ReplacementMorphImpl : public hal::MorphContext +struct ReplacementMorphImpl : public hal::Morph { cvhalFilter2D * ctx; bool isInitialized; @@ -1184,7 +1184,7 @@ INIT_TRAIT(CV_32FC4, 32f, 32f_C4R, 4, zero[4] = {0}) //-------------------------------------- -struct IppMorphBaseImpl : public hal::MorphContext +struct IppMorphBaseImpl : public hal::Morph { virtual bool init(int _op, int _src_type, int dst_type, int max_width, int max_height, int kernel_type, uchar * kernel_data, size_t kernel_step, int kernel_width, int kernel_height, @@ -1379,7 +1379,7 @@ static IppMorphBaseImpl * createIppImpl(int type) // ===== 3. Fallback implementation -struct OcvMorphImpl : public hal::MorphContext +struct OcvMorphImpl : public hal::Morph { Ptr f; int iterations; @@ -1425,7 +1425,7 @@ struct OcvMorphImpl : public hal::MorphContext namespace hal { -Ptr MorphContext ::create(int op, int src_type, int dst_type, int max_width, int max_height, +Ptr Morph ::create(int op, int src_type, int dst_type, int max_width, int max_height, int kernel_type, uchar * kernel_data, size_t kernel_step, int kernel_width, int kernel_height, int anchor_x, int anchor_y, int borderType, const double borderValue[4], @@ -1438,7 +1438,7 @@ Ptr MorphContext ::create(int op, int src_type, int dst_type, int anchor_x, anchor_y, borderType, borderValue, iterations, isSubmatrix, allowInplace)) { - return Ptr(impl); + return Ptr(impl); } delete impl; } @@ -1453,7 +1453,7 @@ Ptr MorphContext ::create(int op, int src_type, int dst_type, int anchor_x, anchor_y, borderType, borderValue, iterations, isSubmatrix, allowInplace)) { - return Ptr(impl); + return Ptr(impl); } delete impl; } @@ -1465,7 +1465,7 @@ Ptr MorphContext ::create(int op, int src_type, int dst_type, int kernel_type, kernel_data, kernel_step, kernel_width, kernel_height, anchor_x, anchor_y, borderType, borderValue, iterations, isSubmatrix, allowInplace); - return Ptr(impl); + return Ptr(impl); } } @@ -1858,7 +1858,7 @@ static void morphOp( int op, InputArray _src, OutputArray _dst, Size d_wsz(dst.cols, dst.rows); dst.locateROI(d_wsz, d_ofs); - Ptr ctx = hal::MorphContext::create(op, src.type(), dst.type(), src.cols, src.rows, + Ptr ctx = hal::Morph::create(op, src.type(), dst.type(), src.cols, src.rows, kernel.type(), kernel.data, kernel.step, kernel.cols, kernel.rows, anchor.x, anchor.y, borderType, borderValue.val, iterations, src.isSubmatrix(), src.data == dst.data); From 008abd28fd27742d574372acdb5e6839ffb5316e Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Wed, 13 Jan 2016 17:23:57 +0300 Subject: [PATCH 2/7] Extracted HAL interfaces for DFT/DCT, added new test --- modules/core/include/opencv2/core/hal/hal.hpp | 19 + .../core/include/opencv2/core/hal/interface.h | 10 + modules/core/src/dxt.cpp | 1923 +++++++++++------ modules/core/src/hal_replacement.hpp | 25 + modules/core/test/test_dxt.cpp | 76 + modules/imgproc/src/templmatch.cpp | 36 +- 6 files changed, 1405 insertions(+), 684 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 64af09ab8..52a5f99b3 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -187,6 +187,25 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); +struct DftContext +{ + void * impl; + bool useReplacement; + DftContext() : impl(0), useReplacement(false) {} +}; + +CV_EXPORTS void dftInit2D(DftContext & c, int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows = 0); +CV_EXPORTS void dftRun2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); +CV_EXPORTS void dftFree2D(DftContext & c); + +CV_EXPORTS void dftInit(DftContext & c, int len, int count, int depth, int flags, bool * useBuffer = 0); +CV_EXPORTS void dftRun(const DftContext & c, const void * src, void * dst); +CV_EXPORTS void dftFree(DftContext & c); + +CV_EXPORTS void dctInit(DftContext & c, int width, int height, int depth, int flags); +CV_EXPORTS void dctRun(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); +CV_EXPORTS void dctFree(DftContext & c); + //! @} core_hal //============================================================================= diff --git a/modules/core/include/opencv2/core/hal/interface.h b/modules/core/include/opencv2/core/hal/interface.h index ba3f55c33..0da68f18c 100644 --- a/modules/core/include/opencv2/core/hal/interface.h +++ b/modules/core/include/opencv2/core/hal/interface.h @@ -12,6 +12,16 @@ //! @} +#define CV_HAL_DFT_INVERSE 1 +#define CV_HAL_DFT_SCALE 2 +#define CV_HAL_DFT_ROWS 4 +#define CV_HAL_DFT_COMPLEX_OUTPUT 16 +#define CV_HAL_DFT_REAL_OUTPUT 32 +#define CV_HAL_DFT_TWO_STAGE 64 +#define CV_HAL_DFT_STAGE_COLS 128 +#define CV_HAL_DFT_IS_CONTINUOUS 512 +#define CV_HAL_DFT_IS_INPLACE 1024 + #ifdef __cplusplus #include #else diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 691b29746..1265091bc 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -173,7 +173,7 @@ DFTFactorize( int n, int* factors ) } static void -DFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab ) +DFTInit( int n0, int nf, const int* factors, int* itab, int elem_size, void* _wave, int inv_itab ) { int digits[34], radix[34]; int n = factors[0], m = 0; @@ -519,19 +519,59 @@ static IppStatus ippsDFTInv_PackToR( const double* src, double* dst, } #endif -enum { DFT_NO_PERMUTE=256, DFT_COMPLEX_INPUT_OR_OUTPUT=512 }; +struct OcvDftOptions; + +typedef void (*DFTFunc)(const OcvDftOptions & c, const void* src, void* dst); + +struct OcvDftOptions { + int nf; + int *factors; + double scale; + + int* itab; + void* wave; + int tab_size; + int n; + + bool isInverse; + bool noPermute; + bool isComplex; + + bool haveSSE3; + + DFTFunc dft_func; + bool useIpp; + +#ifdef USE_IPP_DFT + uchar* ipp_spec; + uchar* ipp_work; +#endif + + OcvDftOptions() + { + nf = 0; + factors = 0; + scale = 0; + itab = 0; + wave = 0; + tab_size = 0; + n = 0; + isInverse = false; + noPermute = false; + isComplex = false; + useIpp = false; +#ifdef USE_IPP_DFT + ipp_spec = 0; + ipp_work = 0; +#endif + dft_func = 0; + haveSSE3 = checkHardwareSupport(CV_CPU_SSE3); + } +}; // mixed-radix complex discrete Fourier transform: double-precision version template static void -DFT( const Complex* src, Complex* dst, int n, - int nf, const int* factors, const int* itab, - const Complex* wave, int tab_size, - const void* -#ifdef USE_IPP_DFT - spec -#endif - , Complex* buf, - int flags, double _scale ) +DFT(const OcvDftOptions & c, const Complex* src, Complex* dst) { static const T sin_120 = (T)0.86602540378443864676372317075294; static const T fft5_2 = (T)0.559016994374947424102293417182819; @@ -539,20 +579,23 @@ DFT( const Complex* src, Complex* dst, int n, static const T fft5_4 = (T)-1.538841768587626701285145288018455; static const T fft5_5 = (T)0.363271264002680442947733378740309; - int n0 = n, f_idx, nx; - int inv = flags & DFT_INVERSE; - int dw0 = tab_size, dw; + const Complex* wave = (Complex*)c.wave; + const int * itab = c.itab; + + int n = c.n; + int f_idx, nx; + int inv = c.isInverse; + int dw0 = c.tab_size, dw; int i, j, k; Complex t; - T scale = (T)_scale; - int tab_step; + T scale = (T)c.scale; -#ifdef USE_IPP_DFT - if( spec ) + if( c.useIpp ) { +#ifdef USE_IPP_DFT if( !inv ) { - if (ippsDFTFwd_CToC( src, dst, spec, (uchar*)buf ) >= 0) + if (ippsDFTFwd_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; @@ -560,22 +603,22 @@ DFT( const Complex* src, Complex* dst, int n, } else { - if (ippsDFTInv_CToC( src, dst, spec, (uchar*)buf ) >= 0) + if (ippsDFTInv_CToC( src, dst, c.ipp_spec, c.ipp_work ) >= 0) { CV_IMPL_ADD(CV_IMPL_IPP); return; } } setIppErrorStatus(); - } #endif + } - tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n; + int tab_step = c.tab_size == n ? 1 : c.tab_size == n*2 ? 2 : c.tab_size/n; // 0. shuffle data if( dst != src ) { - assert( (flags & DFT_NO_PERMUTE) == 0 ); + assert( !c.noPermute ); if( !inv ) { for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step ) @@ -609,10 +652,10 @@ DFT( const Complex* src, Complex* dst, int n, } else { - if( (flags & DFT_NO_PERMUTE) == 0 ) + if( !c.noPermute ) { - CV_Assert( factors[0] == factors[nf-1] ); - if( nf == 1 ) + CV_Assert( c.factors[0] == c.factors[c.nf-1] ); + if( c.nf == 1 ) { if( (n & 3) == 0 ) { @@ -662,22 +705,22 @@ DFT( const Complex* src, Complex* dst, int n, n = 1; // 1. power-2 transforms - if( (factors[0] & 1) == 0 ) + if( (c.factors[0] & 1) == 0 ) { - if( factors[0] >= 4 && checkHardwareSupport(CV_CPU_SSE3)) + if( c.factors[0] >= 4 && c.haveSSE3) { DFT_VecR4 vr4; - n = vr4(dst, factors[0], n0, dw0, wave); + n = vr4(dst, c.factors[0], c.n, dw0, wave); } // radix-4 transform - for( ; n*4 <= factors[0]; ) + for( ; n*4 <= c.factors[0]; ) { nx = n; n *= 4; dw0 /= 4; - for( i = 0; i < n0; i += n ) + for( i = 0; i < c.n; i += n ) { Complex *v0, *v1; T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4; @@ -729,14 +772,14 @@ DFT( const Complex* src, Complex* dst, int n, } } - for( ; n < factors[0]; ) + for( ; n < c.factors[0]; ) { // do the remaining radix-2 transform nx = n; n *= 2; dw0 /= 2; - for( i = 0; i < n0; i += n ) + for( i = 0; i < c.n; i += n ) { Complex* v = dst + i; T r0 = v[0].re + v[nx].re; @@ -761,9 +804,9 @@ DFT( const Complex* src, Complex* dst, int n, } // 2. all the other transforms - for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ ) + for( f_idx = (c.factors[0]&1) ? 0 : 1; f_idx < c.nf; f_idx++ ) { - int factor = factors[f_idx]; + int factor = c.factors[f_idx]; nx = n; n *= factor; dw0 /= factor; @@ -771,7 +814,7 @@ DFT( const Complex* src, Complex* dst, int n, if( factor == 3 ) { // radix-3 - for( i = 0; i < n0; i += n ) + for( i = 0; i < c.n; i += n ) { Complex* v = dst + i; @@ -807,7 +850,7 @@ DFT( const Complex* src, Complex* dst, int n, else if( factor == 5 ) { // radix-5 - for( i = 0; i < n0; i += n ) + for( i = 0; i < c.n; i += n ) { for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) { @@ -863,11 +906,12 @@ DFT( const Complex* src, Complex* dst, int n, { // radix-"factor" - an odd number int p, q, factor2 = (factor - 1)/2; - int d, dd, dw_f = tab_size/factor; + int d, dd, dw_f = c.tab_size/factor; + AutoBuffer > buf(factor2 * 2); Complex* a = buf; - Complex* b = buf + factor2; + Complex* b = a + factor2; - for( i = 0; i < n0; i += n ) + for( i = 0; i < c.n; i += n ) { for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) { @@ -931,7 +975,7 @@ DFT( const Complex* src, Complex* dst, int n, s1.im += r1 - i1; s0.im += r1 + i1; d += dd; - d -= -(d >= tab_size) & tab_size; + d -= -(d >= c.tab_size) & c.tab_size; } v[k] = s0; @@ -948,7 +992,7 @@ DFT( const Complex* src, Complex* dst, int n, if( inv ) im_scale = -im_scale; - for( i = 0; i < n0; i++ ) + for( i = 0; i < c.n; i++ ) { T t0 = dst[i].re*re_scale; T t1 = dst[i].im*im_scale; @@ -958,7 +1002,7 @@ DFT( const Complex* src, Complex* dst, int n, } else if( inv ) { - for( i = 0; i <= n0 - 2; i += 2 ) + for( i = 0; i <= c.n - 2; i += 2 ) { T t0 = -dst[i].im; T t1 = -dst[i+1].im; @@ -966,8 +1010,8 @@ DFT( const Complex* src, Complex* dst, int n, dst[i+1].im = t1; } - if( i < n0 ) - dst[n0-1].im = -dst[n0-1].im; + if( i < c.n ) + dst[c.n-1].im = -dst[c.n-1].im; } } @@ -977,23 +1021,18 @@ DFT( const Complex* src, Complex* dst, int n, re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ... re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */ template static void -RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, - const Complex* wave, int tab_size, const void* -#ifdef USE_IPP_DFT - spec -#endif - , - Complex* buf, int flags, double _scale ) +RealDFT(const OcvDftOptions & c, const T* src, T* dst) { - int complex_output = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0; - T scale = (T)_scale; - int j, n2 = n >> 1; + int n = c.n; + int complex_output = c.isComplex; + T scale = (T)c.scale; + int j; dst += complex_output; -#ifdef USE_IPP_DFT - if( spec ) + if( c.useIpp ) { - if (ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ) >=0) +#ifdef USE_IPP_DFT + if (ippsDFTFwd_RToPack( src, dst, c.ipp_spec, c.ipp_work ) >=0) { if( complex_output ) { @@ -1006,9 +1045,9 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, return; } setIppErrorStatus(); - } #endif - assert( tab_size == n ); + } + assert( c.tab_size == n ); if( n == 1 ) { @@ -1028,15 +1067,19 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, _dst[0].im = 0; for( j = 1; j < n; j += 2 ) { - T t0 = src[itab[j]]*scale; - T t1 = src[itab[j+1]]*scale; + T t0 = src[c.itab[j]]*scale; + T t1 = src[c.itab[j+1]]*scale; _dst[j].re = t0; _dst[j].im = 0; _dst[j+1].re = t1; _dst[j+1].im = 0; } - DFT( _dst, _dst, n, nf, factors, itab, wave, - tab_size, 0, buf, DFT_NO_PERMUTE, 1 ); + OcvDftOptions sub_c = c; + sub_c.isComplex = false; + sub_c.isInverse = false; + sub_c.noPermute = true; + sub_c.scale = 1.; + DFT(sub_c, _dst, _dst); if( !complex_output ) dst[1] = dst[0]; } @@ -1045,12 +1088,22 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, T t0, t; T h1_re, h1_im, h2_re, h2_im; T scale2 = scale*(T)0.5; - factors[0] >>= 1; + int n2 = n >> 1; - DFT( (Complex*)src, (Complex*)dst, n2, nf - (factors[0] == 1), - factors + (factors[0] == 1), - itab, wave, tab_size, 0, buf, 0, 1 ); - factors[0] <<= 1; + c.factors[0] >>= 1; + + OcvDftOptions sub_c = c; + sub_c.factors += (c.factors[0] == 1); + sub_c.nf -= (c.factors[0] == 1); + sub_c.isComplex = false; + sub_c.isInverse = false; + sub_c.noPermute = false; + sub_c.scale = 1.; + sub_c.n = n2; + + DFT(sub_c, (Complex*)src, (Complex*)dst); + + c.factors[0] <<= 1; t = dst[0] - dst[1]; dst[0] = (dst[0] + dst[1])*scale; @@ -1060,6 +1113,8 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, t = dst[n-1]; dst[n-1] = dst[1]; + const Complex *wave = (const Complex*)c.wave; + for( j = 2, wave++; j < n2; j += 2, wave++ ) { /* calc odd */ @@ -1103,22 +1158,16 @@ RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */ template static void -CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, - const Complex* wave, int tab_size, - const void* -#ifdef USE_IPP_DFT - spec -#endif - , Complex* buf, - int flags, double _scale ) +CCSIDFT(const OcvDftOptions & c, const T* src, T* dst) { - int complex_input = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0; - int j, k, n2 = (n+1) >> 1; - T scale = (T)_scale; + int n = c.n; + int complex_input = c.isComplex; + int j, k; + T scale = (T)c.scale; T save_s1 = 0.; T t0, t1, t2, t3, t; - assert( tab_size == n ); + assert( c.tab_size == n ); if( complex_input ) { @@ -1127,10 +1176,10 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, ((T*)src)[1] = src[0]; src++; } -#ifdef USE_IPP_DFT - if( spec ) + if( c.useIpp ) { - if (ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ) >=0) +#ifdef USE_IPP_DFT + if (ippsDFTInv_PackToR( src, dst, c.ipp_spec, c.ipp_work ) >=0) { if( complex_input ) ((T*)src)[0] = (T)save_s1; @@ -1139,8 +1188,8 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, } setIppErrorStatus(); - } #endif + } if( n == 1 ) { dst[0] = (T)(src[0]*scale); @@ -1158,16 +1207,25 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, _dst[0].re = src[0]; _dst[0].im = 0; + + int n2 = (n+1) >> 1; + for( j = 1; j < n2; j++ ) { - int k0 = itab[j], k1 = itab[n-j]; + int k0 = c.itab[j], k1 = c.itab[n-j]; t0 = _src[j].re; t1 = _src[j].im; _dst[k0].re = t0; _dst[k0].im = -t1; _dst[k1].re = t0; _dst[k1].im = t1; } - DFT( _dst, _dst, n, nf, factors, itab, wave, - tab_size, 0, buf, DFT_NO_PERMUTE, 1. ); + OcvDftOptions sub_c = c; + sub_c.isComplex = false; + sub_c.isInverse = false; + sub_c.noPermute = true; + sub_c.scale = 1.; + sub_c.n = n; + + DFT(sub_c, _dst, _dst); dst[0] *= scale; for( j = 1; j < n; j += 2 ) { @@ -1180,7 +1238,7 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, else { int inplace = src == dst; - const Complex* w = wave; + const Complex* w = (const Complex*)c.wave; t = src[1]; t0 = (src[0] + src[n-1]); @@ -1188,6 +1246,8 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, dst[0] = t0; dst[1] = t1; + int n2 = (n+1) >> 1; + for( j = 2, w++; j < n2; j += 2, w++ ) { T h1_re, h1_im, h2_re, h2_im; @@ -1218,10 +1278,10 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, else { int j2 = j >> 1; - k = itab[j2]; + k = c.itab[j2]; dst[k] = t0; dst[k+1] = t1; - k = itab[n2-j2]; + k = c.itab[n2-j2]; dst[k] = t2; dst[k+1]= t3; } @@ -1239,19 +1299,26 @@ CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, } else { - k = itab[n2]; + k = c.itab[n2]; dst[k*2] = t0; dst[k*2+1] = t1; } } - factors[0] >>= 1; - DFT( (Complex*)dst, (Complex*)dst, n2, - nf - (factors[0] == 1), - factors + (factors[0] == 1), itab, - wave, tab_size, 0, buf, - inplace ? 0 : DFT_NO_PERMUTE, 1. ); - factors[0] <<= 1; + c.factors[0] >>= 1; + + OcvDftOptions sub_c = c; + sub_c.factors += (c.factors[0] == 1); + sub_c.nf -= (c.factors[0] == 1); + sub_c.isComplex = false; + sub_c.isInverse = false; + sub_c.noPermute = !inplace; + sub_c.scale = 1.; + sub_c.n = n2; + + DFT(sub_c, (Complex*)dst, (Complex*)dst); + + c.factors[0] <<= 1; for( j = 0; j < n; j += 2 ) { @@ -1436,57 +1503,35 @@ ExpandCCS( uchar* _ptr, int n, int elem_size ) } } - -typedef void (*DFTFunc)( - const void* src, void* dst, int n, int nf, int* factors, - const int* itab, const void* wave, int tab_size, - const void* spec, void* buf, int inv, double scale ); - -static void DFT_32f( const Complexf* src, Complexf* dst, int n, - int nf, const int* factors, const int* itab, - const Complexf* wave, int tab_size, - const void* spec, Complexf* buf, - int flags, double scale ) +static void DFT_32f(const OcvDftOptions & c, const Complexf* src, Complexf* dst) { - DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); + DFT(c, src, dst); } -static void DFT_64f( const Complexd* src, Complexd* dst, int n, - int nf, const int* factors, const int* itab, - const Complexd* wave, int tab_size, - const void* spec, Complexd* buf, - int flags, double scale ) +static void DFT_64f(const OcvDftOptions & c, const Complexd* src, Complexd* dst) { - DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); + DFT(c, src, dst); } -static void RealDFT_32f( const float* src, float* dst, int n, int nf, int* factors, - const int* itab, const Complexf* wave, int tab_size, const void* spec, - Complexf* buf, int flags, double scale ) +static void RealDFT_32f(const OcvDftOptions & c, const float* src, float* dst) { - RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); + RealDFT(c, src, dst); } -static void RealDFT_64f( const double* src, double* dst, int n, int nf, int* factors, - const int* itab, const Complexd* wave, int tab_size, const void* spec, - Complexd* buf, int flags, double scale ) +static void RealDFT_64f(const OcvDftOptions & c, const double* src, double* dst) { - RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); + RealDFT(c, src, dst); } -static void CCSIDFT_32f( const float* src, float* dst, int n, int nf, int* factors, - const int* itab, const Complexf* wave, int tab_size, const void* spec, - Complexf* buf, int flags, double scale ) +static void CCSIDFT_32f(const OcvDftOptions & c, const float* src, float* dst) { - CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); + CCSIDFT(c, src, dst); } -static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* factors, - const int* itab, const Complexd* wave, int tab_size, const void* spec, - Complexd* buf, int flags, double scale ) +static void CCSIDFT_64f(const OcvDftOptions & c, const double* src, double* dst) { - CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); + CCSIDFT(c, src, dst); } } @@ -1508,8 +1553,11 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_C_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) + Dft_C_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + const Dft& _ippidft, int _norm_flag, bool *_ok) : + ParallelLoopBody(), + src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), + ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) { *ok = true; } @@ -1523,7 +1571,7 @@ public: int sizeSpec=0; int sizeInit=0; - IppiSize srcRoiSize = {src.cols, 1}; + IppiSize srcRoiSize = {width, 1}; status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); if ( status < 0 ) @@ -1555,7 +1603,8 @@ public: } for( int i = range.start; i < range.end; ++i) - if(!ippidft(src.ptr(i), (int)src.step,dst.ptr(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer)) + if(!ippidft((Ipp32fc*)(src + src_step * i), src_step, (Ipp32fc*)(dst + dst_step * i), dst_step, + pDFTSpec, (Ipp8u*)pBuffer)) { *ok = false; } @@ -1568,8 +1617,11 @@ public: } private: - const Mat& src; - Mat& dst; + uchar * src; + int src_step; + uchar * dst; + int dst_step; + int width; const Dft& ippidft; int norm_flag; bool *ok; @@ -1582,8 +1634,11 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_R_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) : - ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) + Dft_R_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + const Dft& _ippidft, int _norm_flag, bool *_ok) : + ParallelLoopBody(), + src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), + ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) { *ok = true; } @@ -1597,7 +1652,7 @@ public: int sizeSpec=0; int sizeInit=0; - IppiSize srcRoiSize = {src.cols, 1}; + IppiSize srcRoiSize = {width, 1}; status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); if ( status < 0 ) @@ -1629,7 +1684,8 @@ public: } for( int i = range.start; i < range.end; ++i) - if(!ippidft(src.ptr(i), (int)src.step,dst.ptr(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer)) + if(!ippidft((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, + pDFTSpec, (Ipp8u*)pBuffer)) { *ok = false; } @@ -1642,8 +1698,11 @@ public: } private: - const Mat& src; - Mat& dst; + uchar * src; + int src_step; + uchar * dst; + int dst_step; + int width; const Dft& ippidft; int norm_flag; bool *ok; @@ -1652,18 +1711,18 @@ private: }; template -bool Dft_C_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag) +bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; - parallel_for_(Range(0, src.rows), Dft_C_IPPLoop_Invoker(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) ); + parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); return ok; } template -bool Dft_R_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag) +bool Dft_R_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; - parallel_for_(Range(0, src.rows), Dft_R_IPPLoop_Invoker(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) ); + parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); return ok; } @@ -1691,7 +1750,7 @@ private: ippiDFT_R_Func func; }; -static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) +static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -1700,7 +1759,7 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) int sizeSpec=0; int sizeInit=0; - IppiSize srcRoiSize = {src.cols, src.rows}; + IppiSize srcRoiSize = {width, height}; status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); if ( status < 0 ) @@ -1728,9 +1787,9 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) } if (!inv) - status = ippiDFTFwd_CToC_32fc_C1R( src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, pDFTSpec, pBuffer ); + status = ippiDFTFwd_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer ); else - status = ippiDFTInv_CToC_32fc_C1R( src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, pDFTSpec, pBuffer ); + status = ippiDFTInv_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer ); if ( sizeBuffer > 0 ) ippFree( pBuffer ); @@ -1745,7 +1804,7 @@ static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) return false; } -static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) +static bool ippi_DFT_R_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -1754,7 +1813,7 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) int sizeSpec=0; int sizeInit=0; - IppiSize srcRoiSize = {src.cols, src.rows}; + IppiSize srcRoiSize = {width, height}; status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); if ( status < 0 ) @@ -1782,9 +1841,9 @@ static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) } if (!inv) - status = ippiDFTFwd_RToPack_32f_C1R( src.ptr(), (int)(src.step), dst.ptr(), (int)dst.step, pDFTSpec, pBuffer ); + status = ippiDFTFwd_RToPack_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer ); else - status = ippiDFTInv_PackToR_32f_C1R( src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, pDFTSpec, pBuffer ); + status = ippiDFTInv_PackToR_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer ); if ( sizeBuffer > 0 ) ippFree( pBuffer ); @@ -2426,111 +2485,324 @@ static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) namespace cv { -static void complementComplexOutput(Mat& dst, int len, int dft_dims) -{ - int i, n = dst.cols; - size_t elem_size = dst.elemSize1(); - if( elem_size == sizeof(float) ) - { - float* p0 = dst.ptr(); - size_t dstep = dst.step/sizeof(p0[0]); - for( i = 0; i < len; i++ ) - { - float* p = p0 + dstep*i; - float* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i); - for( int j = 1; j < (n+1)/2; j++ ) - { - p[(n-j)*2] = q[j*2]; - p[(n-j)*2+1] = -q[j*2+1]; - } +template +static void complementComplex(T * ptr, int step, int n, int len, int dft_dims) +{ + T* p0 = (T*)ptr; + size_t dstep = step/sizeof(p0[0]); + for(int i = 0; i < len; i++ ) + { + T* p = p0 + dstep*i; + T* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i); + + for( int j = 1; j < (n+1)/2; j++ ) + { + p[(n-j)*2] = q[j*2]; + p[(n-j)*2+1] = -q[j*2+1]; } } +} + +static void complementComplexOutput(int depth, uchar * ptr, int step, int count, int len, int dft_dims) +{ + if( depth == CV_32F ) + complementComplex((float*)ptr, step, count, len, dft_dims); else - { - double* p0 = dst.ptr(); - size_t dstep = dst.step/sizeof(p0[0]); - for( i = 0; i < len; i++ ) - { - double* p = p0 + dstep*i; - double* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i); - - for( int j = 1; j < (n+1)/2; j++ ) - { - p[(n-j)*2] = q[j*2]; - p[(n-j)*2+1] = -q[j*2+1]; - } - } - } -} + complementComplex((double*)ptr, step, count, len, dft_dims); } -void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) +enum DftMode { + InvalidDft = 0, + FwdRealToCCS, + FwdRealToComplex, + FwdComplexToComplex, + InvCCSToReal, + InvComplexToReal, + InvComplexToComplex, +}; + +enum DftDims { + InvalidDim = 0, + OneDim, + OneDimColWise, + TwoDims +}; + +inline const char * modeName(DftMode m) { -#ifdef HAVE_CLAMDFFT - CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU && - _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0, - ocl_dft_amdfft(_src0, _dst, flags)) -#endif - -#ifdef HAVE_OPENCL - CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2, - ocl_dft(_src0, _dst, flags, nonzero_rows)) -#endif - - static DFTFunc dft_tbl[6] = + switch (m) { - (DFTFunc)DFT_32f, - (DFTFunc)RealDFT_32f, - (DFTFunc)CCSIDFT_32f, - (DFTFunc)DFT_64f, - (DFTFunc)RealDFT_64f, - (DFTFunc)CCSIDFT_64f + case InvalidDft: return "InvalidDft"; + case FwdRealToCCS: return "FwdRealToCCS"; + case FwdRealToComplex: return "FwdRealToComplex"; + case FwdComplexToComplex: return "FwdComplexToComplex"; + case InvCCSToReal: return "InvCCSToReal"; + case InvComplexToReal: return "InvComplexToReal"; + case InvComplexToComplex: return "InvComplexToComplex"; + } + return 0; +} + +inline const char * dimsName(DftDims d) +{ + switch (d) + { + case InvalidDim: return "InvalidDim"; + case OneDim: return "OneDim"; + case OneDimColWise: return "OneDimColWise"; + case TwoDims: return "TwoDims"; }; - AutoBuffer buf; - Mat src0 = _src0.getMat(), src = src0; - int prev_len = 0, stage = 0; - bool inv = (flags & DFT_INVERSE) != 0; - int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0); - int type = src.type(), depth = src.depth(); - int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2; - int factors[34]; - bool inplace_transform = false; -#ifdef USE_IPP_DFT - AutoBuffer ippbuf; - int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; -#endif + return 0; +} - CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); +template +inline bool isInv(T mode) +{ + switch ((DftMode)mode) + { + case InvCCSToReal: + case InvComplexToReal: + case InvComplexToComplex: return true; + default: return false; + } +} - if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) ) - _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); - else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) ) - _dst.create( src.size(), depth ); +inline DftMode determineMode(bool inv, int cn1, int cn2) +{ + if (!inv) + { + if (cn1 == 1 && cn2 == 1) + return FwdRealToCCS; + else if (cn1 == 1 && cn2 == 2) + return FwdRealToComplex; + else if (cn1 == 2 && cn2 == 2) + return FwdComplexToComplex; + } else - _dst.create( src.size(), type ); + { + if (cn1 == 1 && cn2 == 1) + return InvCCSToReal; + else if (cn1 == 2 && cn2 == 1) + return InvComplexToReal; + else if (cn1 == 2 && cn2 == 2) + return InvComplexToComplex; + } + return InvalidDft; +} - Mat dst = _dst.getMat(); + +inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuous) +{ + // printf("%d x %d (%d, %d)\n", rows, cols, isRowWise, isContinuous); + if (isRowWise) + return OneDim; + if (cols == 1 && rows > 1) // one-column-shaped input + { + if (isContinuous) + return OneDim; + else + return OneDimColWise; + } + if (rows == 1) + return OneDim; + if (cols > 1 && rows > 1) + return TwoDims; + return InvalidDim; +} + +class OcvDftImpl +{ +protected: + hal::DftContext contextA; + hal::DftContext contextB; + bool needBufferA; + bool needBufferB; + bool inv; + int width; + int height; + DftMode mode; + int elem_size; + int complex_elem_size; + int depth; + bool real_transform; + int nonzero_rows; + bool isRowTransform; + bool isScaled; + std::vector stages; + bool useIpp; + int src_channels; + int dst_channels; + + AutoBuffer tmp_bufA; + AutoBuffer tmp_bufB; + AutoBuffer buf0; + AutoBuffer buf1; + +public: + OcvDftImpl() + { + needBufferA = false; + needBufferB = false; + inv = false; + width = 0; + height = 0; + mode = InvalidDft; + elem_size = 0; + complex_elem_size = 0; + depth = 0; + real_transform = false; + nonzero_rows = 0; + isRowTransform = false; + isScaled = false; + useIpp = false; + src_channels = 0; + dst_channels = 0; + } + + void init(int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows) + { + bool isComplex = _src_channels != _dst_channels; + nonzero_rows = _nonzero_rows; + width = _width; + height = _height; + depth = _depth; + src_channels = _src_channels; + dst_channels = _dst_channels; + bool isInverse = (flags & CV_HAL_DFT_INVERSE) != 0; + bool isInplace = (flags & CV_HAL_DFT_IS_INPLACE) != 0; + bool isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0; + mode = determineMode(isInverse, _src_channels, _dst_channels); + inv = isInverse; + isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0; + isScaled = (flags & CV_HAL_DFT_SCALE) != 0; + needBufferA = false; + needBufferB = false; + real_transform = (mode != FwdComplexToComplex && mode != InvComplexToComplex); + + elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double); + complex_elem_size = elem_size * 2; + if( !real_transform ) + elem_size = complex_elem_size; #if defined USE_IPP_DFT - CV_IPP_CHECK() - { - if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0) + CV_IPP_CHECK() { - if ((flags & DFT_ROWS) == 0) + if (nonzero_rows == 0 && depth == CV_32F && ((width * height)>(int)(1<<6))) { - if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT))) + if (mode == FwdComplexToComplex || mode == InvComplexToComplex || mode == FwdRealToCCS || mode == InvCCSToReal) { - if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag)) + useIpp = true; + return; + } + } + } +#endif + + DftDims dims = determineDims(height, width, isRowTransform, isContinuous); + if (dims == TwoDims) + { + stages.resize(2); + if (mode == InvCCSToReal || mode == InvComplexToReal) + { + stages[0] = 1; + stages[1] = 0; + } + else + { + stages[0] = 0; + stages[1] = 1; + } + } + else + { + stages.resize(1); + if (dims == OneDimColWise) + stages[0] = 1; + else + stages[0] = 0; + } + + for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex) + { + if (stageIndex == 1) + { + isInplace = true; + isComplex = false; + } + + int stage = stages[stageIndex]; + bool isLastStage = (stageIndex + 1 == stages.size()); + + int len, count; + + int f = 0; + if (inv) + f |= CV_HAL_DFT_INVERSE; + if (isScaled) + f |= CV_HAL_DFT_SCALE; + if (isRowTransform) + f |= CV_HAL_DFT_ROWS; + if (isComplex) + f |= CV_HAL_DFT_COMPLEX_OUTPUT; + if (real_transform) + f |= CV_HAL_DFT_REAL_OUTPUT; + if (!isLastStage) + f |= CV_HAL_DFT_TWO_STAGE; + + if( stage == 0 ) // row-wise transform + { + if (width == 1 && !isRowTransform ) + { + len = height; + count = width; + } + else + { + len = width; + count = height; + } + needBufferA = isInplace; + hal::dftInit(contextA, len, count, depth, f, &needBufferA); + if (needBufferA) + tmp_bufA.allocate(len * complex_elem_size); + } + else + { + len = height; + count = width; + f |= CV_HAL_DFT_STAGE_COLS; + needBufferB = isInplace; + hal::dftInit(contextB, len, count, depth, f, &needBufferB); + if (needBufferB) + tmp_bufB.allocate(len * complex_elem_size); + + buf0.allocate(len * complex_elem_size); + buf1.allocate(len * complex_elem_size); + } + } + } + + void run(uchar * src, int src_step, uchar * dst, int dst_step) + { +#if defined USE_IPP_DFT + if (useIpp) + { + int ipp_norm_flag = !isScaled ? 8 : inv ? 2 : 1; + if (!isRowTransform) + { + if (mode == FwdComplexToComplex || mode == InvComplexToComplex) + { + if (ippi_DFT_C_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag)) { CV_IMPL_ADD(CV_IMPL_IPP); return; } setIppErrorStatus(); } - if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT))) + else if (mode == FwdRealToCCS || mode == InvCCSToReal) { - if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag)) + if (ippi_DFT_R_32F(src, src_step, dst, dst_step, width, height, inv, ipp_norm_flag)) { CV_IMPL_ADD(CV_IMPL_IPP); return; @@ -2540,20 +2812,20 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) } else { - if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT))) + if (mode == FwdComplexToComplex || mode == InvComplexToComplex) { ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R; - if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag)) + if (Dft_C_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag)) { CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); return; } setIppErrorStatus(); } - if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT))) + else if (mode == FwdRealToCCS || mode == InvCCSToReal) { ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R; - if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag)) + if (Dft_R_IPPLoop(src, src_step, dst, dst_step, width, height, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag)) { CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); return; @@ -2561,57 +2833,269 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) setIppErrorStatus(); } } + return; } - } #endif - if( !real_transform ) - elem_size = complex_elem_size; - - if( src.cols == 1 && nonzero_rows > 0 ) - CV_Error( CV_StsNotImplemented, - "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n" - "For fast convolution/correlation use 2-column matrix or single-row matrix instead" ); - - // determine, which transform to do first - row-wise - // (stage 0) or column-wise (stage 1) transform - if( !(flags & DFT_ROWS) && src.rows > 1 && - ((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) || - (src.cols > 1 && inv && real_transform)) ) - stage = 1; - - for(;;) - { - double scale = 1; - uchar* wave = 0; - int* itab = 0; - uchar* ptr; - int i, len, count, sz = 0; - int use_buf = 0, odd_real = 0; - DFTFunc dft_func; - - if( stage == 0 ) // row-wise transform + for(uint stageIndex = 0; stageIndex < stages.size(); ++stageIndex) { - len = !inv ? src.cols : dst.cols; - count = src.rows; - if( len == 1 && !(flags & DFT_ROWS) ) + int stage_src_channels = src_channels; + int stage_dst_channels = dst_channels; + + if (stageIndex == 1) { - len = !inv ? src.rows : dst.rows; - count = 1; + src = dst; + src_step = dst_step; + stage_src_channels = stage_dst_channels; } - odd_real = real_transform && (len & 1); + + int stage = stages[stageIndex]; + bool isLastStage = (stageIndex + 1 == stages.size()); + bool isComplex = stage_src_channels != stage_dst_channels; + + if( stage == 0 ) + rowDft(src, src_step, dst, dst_step, isComplex, isLastStage); + else + colDft(src, src_step, dst, dst_step, stage_src_channels, stage_dst_channels, isLastStage); + } + } + + void free() + { + if (useIpp) + return; + hal::dftFree(contextA); + hal::dftFree(contextB); + } + +protected: + + void rowDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage) + { + int len, count; + if (width == 1 && !isRowTransform ) + { + len = height; + count = width; } else { - len = dst.rows; - count = !inv ? src0.cols : dst.cols; - sz = 2*len*complex_elem_size; + len = width; + count = height; + } + int dptr_offset = 0; + int dst_full_len = len*elem_size; + + if( needBufferA ) + { + if (mode == FwdRealToCCS && (len & 1) && len > 1) + dptr_offset = elem_size; } - void *spec = 0; -#ifdef USE_IPP_DFT - if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available + if( !inv && isComplex ) + dst_full_len += (len & 1) ? elem_size : complex_elem_size; + + int nz = nonzero_rows; + if( nz <= 0 || nz > count ) + nz = count; + + int i; + for( i = 0; i < nz; i++ ) { + const uchar* sptr = src_data + src_step * i; + uchar* dptr0 = dst_data + dst_step * i; + uchar* dptr = dptr0; + + if( needBufferA ) + dptr = tmp_bufA; + + hal::dftRun(contextA, sptr, dptr); + + if( needBufferA ) + memcpy( dptr0, dptr + dptr_offset, dst_full_len ); + } + + for( ; i < count; i++ ) + { + uchar* dptr0 = dst_data + dst_step * i; + memset( dptr0, 0, dst_full_len ); + } + if(isLastStage && mode == FwdRealToComplex) + complementComplexOutput(depth, dst_data, dst_step, len, nz, 1); + } + + void colDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) + { + int len = height; + int count = width; + int a = 0, b = count; + uchar *dbuf0, *dbuf1; + const uchar* sptr0 = src_data; + uchar* dptr0 = dst_data; + + dbuf0 = buf0, dbuf1 = buf1; + + if( needBufferB ) + { + dbuf1 = tmp_bufB; + dbuf0 = buf1; + } + + if( real_transform ) + { + int even; + a = 1; + even = (count & 1) == 0; + b = (count+1)/2; + if( !inv ) + { + memset( buf0, 0, len*complex_elem_size ); + CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, elem_size ); + sptr0 += stage_dst_channels*elem_size; + if( even ) + { + memset( buf1, 0, len*complex_elem_size ); + CopyColumn( sptr0 + (count-2)*elem_size, src_step, + buf1, complex_elem_size, len, elem_size ); + } + } + else if( stage_src_channels == 1 ) + { + CopyColumn( sptr0, src_step, buf0, elem_size, len, elem_size ); + ExpandCCS( buf0, len, elem_size ); + if( even ) + { + CopyColumn( sptr0 + (count-1)*elem_size, src_step, + buf1, elem_size, len, elem_size ); + ExpandCCS( buf1, len, elem_size ); + } + sptr0 += elem_size; + } + else + { + CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size ); + if( even ) + { + CopyColumn( sptr0 + b*complex_elem_size, src_step, + buf1, complex_elem_size, len, complex_elem_size ); + } + sptr0 += complex_elem_size; + } + + if( even ) + hal::dftRun(contextB, buf1, dbuf1); + hal::dftRun(contextB, buf0, dbuf0); + + if( stage_dst_channels == 1 ) + { + if( !inv ) + { + // copy the half of output vector to the first/last column. + // before doing that, defgragment the vector + memcpy( dbuf0 + elem_size, dbuf0, elem_size ); + CopyColumn( dbuf0 + elem_size, elem_size, dptr0, + dst_step, len, elem_size ); + if( even ) + { + memcpy( dbuf1 + elem_size, dbuf1, elem_size ); + CopyColumn( dbuf1 + elem_size, elem_size, + dptr0 + (count-1)*elem_size, + dst_step, len, elem_size ); + } + dptr0 += elem_size; + } + else + { + // copy the real part of the complex vector to the first/last column + CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, elem_size ); + if( even ) + CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size, + dst_step, len, elem_size ); + dptr0 += elem_size; + } + } + else + { + assert( !inv ); + CopyColumn( dbuf0, complex_elem_size, dptr0, + dst_step, len, complex_elem_size ); + if( even ) + CopyColumn( dbuf1, complex_elem_size, + dptr0 + b*complex_elem_size, + dst_step, len, complex_elem_size ); + dptr0 += complex_elem_size; + } + } + + for(int i = a; i < b; i += 2 ) + { + if( i+1 < b ) + { + CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size ); + hal::dftRun(contextB, buf1, dbuf1); + } + else + CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size ); + + hal::dftRun(contextB, buf0, dbuf0); + + if( i+1 < b ) + CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size ); + else + CopyColumn( dbuf0, complex_elem_size, dptr0, dst_step, len, complex_elem_size ); + sptr0 += 2*complex_elem_size; + dptr0 += 2*complex_elem_size; + } + if(isLastStage && mode == FwdRealToComplex) + complementComplexOutput(depth, dst_data, dst_step, count, len, 2); + } +}; + +class OcvDftBasicImpl +{ +public: + OcvDftOptions opt; + int _factors[34]; + AutoBuffer wave_buf; + AutoBuffer itab_buf; +#ifdef USE_IPP_DFT + AutoBuffer ippbuf; + AutoBuffer ippworkbuf; +#endif + +public: + OcvDftBasicImpl() + { + opt.factors = _factors; + } + OcvDftBasicImpl & operator=(const OcvDftBasicImpl & other) + { + this->opt = other.opt; + return *this; + } + void init(int len, int count, int depth, int flags, bool *needBuffer) + { + int prev_len = opt.n; + + int stage = (flags & CV_HAL_DFT_STAGE_COLS) != 0 ? 1 : 0; + int complex_elem_size = depth == CV_32F ? sizeof(Complex) : sizeof(Complex); + opt.isInverse = (flags & CV_HAL_DFT_INVERSE) != 0; + bool real_transform = (flags & CV_HAL_DFT_REAL_OUTPUT) != 0; + opt.isComplex = (stage == 0) && (flags & CV_HAL_DFT_COMPLEX_OUTPUT) != 0; + bool needAnotherStage = (flags & CV_HAL_DFT_TWO_STAGE) != 0; + + opt.scale = 1; + opt.tab_size = len; + opt.n = len; + + opt.useIpp = false; + #ifdef USE_IPP_DFT + opt.ipp_spec = 0; + opt.ipp_work = 0; + + if( CV_IPP_CHECK_COND && (opt.n*count >= 64) ) // use IPP DFT if available + { + int ipp_norm_flag = (flags & CV_HAL_DFT_SCALE) == 0 ? 8 : opt.isInverse ? 2 : 1; int specsize=0, initsize=0, worksize=0; IppDFTGetSizeFunc getSizeFunc = 0; IppDFTInitFunc initFunc = 0; @@ -2642,260 +3126,266 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc; } } - if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 ) + if( getSizeFunc(opt.n, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 ) { ippbuf.allocate(specsize + initsize + 64); - spec = alignPtr(&ippbuf[0], 32); - uchar* initbuf = alignPtr((uchar*)spec + specsize, 32); - if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 ) - spec = 0; - sz += worksize; + opt.ipp_spec = alignPtr(&ippbuf[0], 32); + ippworkbuf.allocate(worksize + 32); + opt.ipp_work = alignPtr(&ippworkbuf[0], 32); + uchar* initbuf = alignPtr((uchar*)opt.ipp_spec + specsize, 32); + if( initFunc(opt.n, ipp_norm_flag, ippAlgHintNone, opt.ipp_spec, initbuf) >= 0 ) + opt.useIpp = true; } else setIppErrorStatus(); } - else -#endif + #endif + + if (!opt.useIpp) { - if( len != prev_len ) - nf = DFTFactorize( len, factors ); - - inplace_transform = factors[0] == factors[nf-1]; - sz += len*(complex_elem_size + sizeof(int)); - i = nf > 1 && (factors[0] & 1) == 0; - if( (factors[i] & 1) != 0 && factors[i] > 5 ) - sz += (factors[i]+1)*complex_elem_size; - - if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) || - (stage == 1 && !inplace_transform) ) + if (len != prev_len) { - use_buf = 1; - sz += len*complex_elem_size; + opt.nf = DFTFactorize( opt.n, opt.factors ); + } + bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1]; + if (len != prev_len || (!inplace_transform && opt.isInverse && real_transform)) + { + wave_buf.allocate(opt.n*complex_elem_size); + opt.wave = wave_buf; + itab_buf.allocate(opt.n); + opt.itab = itab_buf; + DFTInit( opt.n, opt.nf, opt.factors, opt.itab, complex_elem_size, + opt.wave, stage == 0 && opt.isInverse && real_transform ); } - } - - ptr = (uchar*)buf; - buf.allocate( sz + 32 ); - if( ptr != (uchar*)buf ) - prev_len = 0; // because we release the buffer, - // force recalculation of - // twiddle factors and permutation table - ptr = (uchar*)buf; - if( !spec ) - { - wave = ptr; - ptr += len*complex_elem_size; - itab = (int*)ptr; - ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 ); - - if( len != prev_len || (!inplace_transform && inv && real_transform)) - DFTInit( len, nf, factors, itab, complex_elem_size, - wave, stage == 0 && inv && real_transform ); // otherwise reuse the tables calculated on the previous stage - } - - if( stage == 0 ) - { - uchar* tmp_buf = 0; - int dptr_offset = 0; - int dst_full_len = len*elem_size; - int _flags = (int)inv + (src.channels() != dst.channels() ? - DFT_COMPLEX_INPUT_OR_OUTPUT : 0); - if( use_buf ) + if (needBuffer) { - tmp_buf = ptr; - ptr += len*complex_elem_size; - if( odd_real && !inv && len > 1 && - !(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT)) - dptr_offset = elem_size; + if( (stage == 0 && ((*needBuffer && !inplace_transform) || (real_transform && (len & 1)))) || + (stage == 1 && !inplace_transform) ) + { + *needBuffer = true; + } } - - if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) ) - dst_full_len += (len & 1) ? elem_size : complex_elem_size; - - dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3]; - - if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) ) - stage = 1; - else if( flags & CV_DXT_SCALE ) - scale = 1./(len * (flags & DFT_ROWS ? 1 : count)); - - if( nonzero_rows <= 0 || nonzero_rows > count ) - nonzero_rows = count; - - for( i = 0; i < nonzero_rows; i++ ) - { - const uchar* sptr = src.ptr(i); - uchar* dptr0 = dst.ptr(i); - uchar* dptr = dptr0; - - if( tmp_buf ) - dptr = tmp_buf; - - dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale ); - if( dptr != dptr0 ) - memcpy( dptr0, dptr + dptr_offset, dst_full_len ); - } - - for( ; i < count; i++ ) - { - uchar* dptr0 = dst.ptr(i); - memset( dptr0, 0, dst_full_len ); - } - - if( stage != 1 ) - { - if( !inv && real_transform && dst.channels() == 2 ) - complementComplexOutput(dst, nonzero_rows, 1); - break; - } - src = dst; } else { - int a = 0, b = count; - uchar *buf0, *buf1, *dbuf0, *dbuf1; - const uchar* sptr0 = src.ptr(); - uchar* dptr0 = dst.ptr(); - buf0 = ptr; - ptr += len*complex_elem_size; - buf1 = ptr; - ptr += len*complex_elem_size; - dbuf0 = buf0, dbuf1 = buf1; - - if( use_buf ) + if (needBuffer) { - dbuf1 = ptr; - dbuf0 = buf1; - ptr += len*complex_elem_size; + *needBuffer = false; } + } - dft_func = dft_tbl[(depth == CV_64F)*3]; - - if( real_transform && inv && src.cols > 1 ) - stage = 0; - else if( flags & CV_DXT_SCALE ) - scale = 1./(len * count); - - if( real_transform ) + { + static DFTFunc dft_tbl[6] = { - int even; - a = 1; - even = (count & 1) == 0; - b = (count+1)/2; - if( !inv ) + (DFTFunc)DFT_32f, + (DFTFunc)RealDFT_32f, + (DFTFunc)CCSIDFT_32f, + (DFTFunc)DFT_64f, + (DFTFunc)RealDFT_64f, + (DFTFunc)CCSIDFT_64f + }; + int idx = 0; + if (stage == 0) + { + if (real_transform) { - memset( buf0, 0, len*complex_elem_size ); - CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size ); - sptr0 += dst.channels()*elem_size; - if( even ) - { - memset( buf1, 0, len*complex_elem_size ); - CopyColumn( sptr0 + (count-2)*elem_size, src.step, - buf1, complex_elem_size, len, elem_size ); - } - } - else if( src.channels() == 1 ) - { - CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size ); - ExpandCCS( buf0, len, elem_size ); - if( even ) - { - CopyColumn( sptr0 + (count-1)*elem_size, src.step, - buf1, elem_size, len, elem_size ); - ExpandCCS( buf1, len, elem_size ); - } - sptr0 += elem_size; - } - else - { - CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size ); - if( even ) - { - CopyColumn( sptr0 + b*complex_elem_size, src.step, - buf1, complex_elem_size, len, complex_elem_size ); - } - sptr0 += complex_elem_size; - } - - if( even ) - dft_func( buf1, dbuf1, len, nf, factors, itab, - wave, len, spec, ptr, inv, scale ); - dft_func( buf0, dbuf0, len, nf, factors, itab, - wave, len, spec, ptr, inv, scale ); - - if( dst.channels() == 1 ) - { - if( !inv ) - { - // copy the half of output vector to the first/last column. - // before doing that, defgragment the vector - memcpy( dbuf0 + elem_size, dbuf0, elem_size ); - CopyColumn( dbuf0 + elem_size, elem_size, dptr0, - dst.step, len, elem_size ); - if( even ) - { - memcpy( dbuf1 + elem_size, dbuf1, elem_size ); - CopyColumn( dbuf1 + elem_size, elem_size, - dptr0 + (count-1)*elem_size, - dst.step, len, elem_size ); - } - dptr0 += elem_size; - } + if (!opt.isInverse) + idx = 1; else - { - // copy the real part of the complex vector to the first/last column - CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size ); - if( even ) - CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size, - dst.step, len, elem_size ); - dptr0 += elem_size; - } - } - else - { - assert( !inv ); - CopyColumn( dbuf0, complex_elem_size, dptr0, - dst.step, len, complex_elem_size ); - if( even ) - CopyColumn( dbuf1, complex_elem_size, - dptr0 + b*complex_elem_size, - dst.step, len, complex_elem_size ); - dptr0 += complex_elem_size; + idx = 2; } } + if (depth == CV_64F) + idx += 3; - for( i = a; i < b; i += 2 ) - { - if( i+1 < b ) - { - CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size ); - dft_func( buf1, dbuf1, len, nf, factors, itab, - wave, len, spec, ptr, inv, scale ); - } - else - CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size ); + opt.dft_func = dft_tbl[idx]; + } - dft_func( buf0, dbuf0, len, nf, factors, itab, - wave, len, spec, ptr, inv, scale ); - - if( i+1 < b ) - CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size ); - else - CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size ); - sptr0 += 2*complex_elem_size; - dptr0 += 2*complex_elem_size; - } - - if( stage != 0 ) - { - if( !inv && real_transform && dst.channels() == 2 && len > 1 ) - complementComplexOutput(dst, len, 2); - break; - } - src = dst; + if(!needAnotherStage && (flags & CV_HAL_DFT_SCALE) != 0) + { + int rowCount = count; + if (stage == 0 && (flags & CV_HAL_DFT_ROWS) != 0) + rowCount = 1; + opt.scale = 1./(len * rowCount); } } + + void run(const void * src, void * dst) + { + opt.dft_func(opt, src, dst); + } + + void free() {} +}; + +namespace hal { + +//================== 1D ====================== + +void dftInit(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer) +{ + int res = cv_hal_dftInit(&context.impl, len, count, depth, flags, needBuffer); + if (res == CV_HAL_ERROR_OK) + { + context.useReplacement = true; + return; + } + + context.useReplacement = false; + OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; + if (!c) + { + c = new OcvDftBasicImpl(); + context.impl = (void*)c; + } + c->init(len, count, depth, flags, needBuffer); +} + +void dftRun(const DftContext & context, const void * src, void * dst) +{ + if (context.useReplacement) + { + int res = cv_hal_dftRun(context.impl, src, dst); + if (res != CV_HAL_ERROR_OK) + { + CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun"); + } + return; + } + OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; + c->run(src, dst); +} + +void dftFree(DftContext & context) +{ + if (context.useReplacement) + { + int res = cv_hal_dftFree(context.impl); + if (res != CV_HAL_ERROR_OK) + { + CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree"); + } + return; + } + + OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; + if (c) + { + c->free(); + delete c; + context.impl = 0; + } +} + + +//================== 2D ====================== + +void dftInit2D(DftContext & c, + int _width, int _height, int _depth, int _src_channels, int _dst_channels, + int flags, + int _nonzero_rows) +{ + int res = cv_hal_dftInit2D(&c.impl, _width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows); + if (res == CV_HAL_ERROR_OK) + { + c.useReplacement = true; + return; + } + c.useReplacement = false; + + if( _width == 1 && _nonzero_rows > 0 ) + CV_Error( CV_StsNotImplemented, + "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n" + "For fast convolution/correlation use 2-column matrix or single-row matrix instead" ); + + OcvDftImpl * d = new OcvDftImpl(); + d->init(_width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows); + c.impl = (void*)d; +} + +void dftRun2D(const DftContext & c, + const void * src, int src_step, void * dst, int dst_step) +{ + if (c.useReplacement) + { + int res = cv_hal_dftRun2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step); + if (res != CV_HAL_ERROR_OK) + { + CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun2D"); + } + return; + } + OcvDftImpl * d = (OcvDftImpl*)c.impl; + d->run((uchar*)src, src_step, (uchar*)dst, dst_step); +} + +void dftFree2D(DftContext & c) +{ + if (c.useReplacement) + { + int res = cv_hal_dftFree2D(c.impl); + if (res != CV_HAL_ERROR_OK) + { + CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree2D"); + } + return; + } + OcvDftImpl * d = (OcvDftImpl*)c.impl; + d->free(); + delete d; + c.impl = 0; +} + +} // cv::hal:: + +} // cv:: + + +void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) +{ +#ifdef HAVE_CLAMDFFT + CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU && + _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0, + ocl_dft_amdfft(_src0, _dst, flags)) +#endif + +#ifdef HAVE_OPENCL + CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2, + ocl_dft(_src0, _dst, flags, nonzero_rows)) +#endif + + Mat src0 = _src0.getMat(), src = src0; + bool inv = (flags & DFT_INVERSE) != 0; + int type = src.type(); + int depth = src.depth(); + + CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); + + if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) ) + _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); + else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) ) + _dst.create( src.size(), depth ); + else + _dst.create( src.size(), type ); + + Mat dst = _dst.getMat(); + + int f = 0; + if (src.isContinuous() && dst.isContinuous()) + f |= CV_HAL_DFT_IS_CONTINUOUS; + if (inv) + f |= CV_HAL_DFT_INVERSE; + if (flags & DFT_ROWS) + f |= CV_HAL_DFT_ROWS; + if (flags & DFT_SCALE) + f |= CV_HAL_DFT_SCALE; + if (src.data == dst.data) + f |= CV_HAL_DFT_IS_INPLACE; + hal::DftContext c; + hal::dftInit2D(c, src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows); + hal::dftRun2D(c, src.data, (int)src.step, dst.data, (int)dst.step); + hal::dftFree2D(c); } @@ -3117,11 +3607,12 @@ namespace cv http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/: */ template static void -DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, - int n, int nf, int* factors, const int* itab, const Complex* dft_wave, - const Complex* dct_wave, const void* spec, Complex* buf ) +DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, + const Complex* dct_wave ) { static const T sin_45 = (T)0.70710678118654752440084436210485; + + int n = c.n; int j, n2 = n >> 1; src_step /= sizeof(src[0]); @@ -3140,8 +3631,7 @@ DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, dft_src[n-j-1] = src[src_step]; } - RealDFT( dft_src, dft_dst, n, nf, factors, - itab, dft_wave, n, spec, buf, 0, 1.0 ); + RealDFT(c, dft_src, dft_dst); src = dft_dst; dst[0] = (T)(src[0]*dct_wave->re*sin_45); @@ -3160,11 +3650,11 @@ DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, template static void -IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, - int n, int nf, int* factors, const int* itab, const Complex* dft_wave, - const Complex* dct_wave, const void* spec, Complex* buf ) +IDCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, + const Complex* dct_wave) { static const T sin_45 = (T)0.70710678118654752440084436210485; + int n = c.n; int j, n2 = n >> 1; src_step /= sizeof(src[0]); @@ -3189,8 +3679,7 @@ IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, } dft_src[n-1] = (T)(src[0]*2*dct_wave->re); - CCSIDFT( dft_src, dft_dst, n, nf, factors, itab, - dft_wave, n, spec, buf, 0, 1.0 ); + CCSIDFT(c, dft_src, dft_dst); for( j = 0; j < n2; j++, dst += dst_step*2 ) { @@ -3279,41 +3768,31 @@ DCTInit( int n, int elem_size, void* _wave, int inv ) } -typedef void (*DCTFunc)(const void* src, int src_step, void* dft_src, - void* dft_dst, void* dst, int dst_step, int n, - int nf, int* factors, const int* itab, const void* dft_wave, - const void* dct_wave, const void* spec, void* buf ); +typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, int src_step, void* dft_src, + void* dft_dst, void* dst, int dst_step, const void* dct_wave); -static void DCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst, - float* dst, int dst_step, int n, int nf, int* factors, const int* itab, - const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf ) +static void DCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, + float* dst, int dst_step, const Complexf* dct_wave) { - DCT(src, src_step, dft_src, dft_dst, dst, dst_step, - n, nf, factors, itab, dft_wave, dct_wave, spec, buf); + DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void IDCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst, - float* dst, int dst_step, int n, int nf, int* factors, const int* itab, - const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf ) +static void IDCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, + float* dst, int dst_step, const Complexf* dct_wave) { - IDCT(src, src_step, dft_src, dft_dst, dst, dst_step, - n, nf, factors, itab, dft_wave, dct_wave, spec, buf); + IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void DCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst, - double* dst, int dst_step, int n, int nf, int* factors, const int* itab, - const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf ) +static void DCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, + double* dst, int dst_step, const Complexd* dct_wave) { - DCT(src, src_step, dft_src, dft_dst, dst, dst_step, - n, nf, factors, itab, dft_wave, dct_wave, spec, buf); + DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void IDCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst, - double* dst, int dst_step, int n, int nf, int* factors, const int* itab, - const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf ) +static void IDCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, + double* dst, int dst_step, const Complexd* dct_wave) { - IDCT(src, src_step, dft_src, dft_dst, dst, dst_step, - n, nf, factors, itab, dft_wave, dct_wave, spec, buf); + IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } } @@ -3336,8 +3815,8 @@ typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*); class DctIPPLoop_Invoker : public ParallelLoopBody { public: - DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, bool _inv, bool *_ok) : - ParallelLoopBody(), src(&_src), dst(&_dst), inv(_inv), ok(_ok) + DctIPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, bool _inv, bool *_ok) : + ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok) { *ok = true; } @@ -3348,7 +3827,7 @@ public: return; #if IPP_VERSION_X100 >= 900 - IppiSize srcRoiSize = {src->cols, 1}; + IppiSize srcRoiSize = {width, 1}; int specSize = 0; int initSize = 0; @@ -3405,7 +3884,7 @@ public: for(int i = range.start; i < range.end; ++i) { - if(ippDctFun(src->ptr(i), (int)src->step,dst->ptr(i), (int)dst->step, pDCTSpec, pBuffer) < 0) + if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, pBuffer) < 0) { *ok = false; IPP_RETURN @@ -3419,7 +3898,7 @@ public: uchar* pBuffer = 0; int bufSize=0; - IppiSize srcRoiSize = {src->cols, 1}; + IppiSize srcRoiSize = {width, 1}; CV_SUPPRESS_DEPRECATED_START @@ -3435,7 +3914,7 @@ public: for( int i = range.start; i < range.end; ++i) { - if(ippDctFun(src->ptr(i), (int)src->step,dst->ptr(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer) < 0) + if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, (Ipp8u*)pBuffer) < 0) { *ok = false; break; @@ -3456,27 +3935,30 @@ public: } private: - const Mat* src; - Mat* dst; + const uchar * src; + int src_step; + uchar * dst; + int dst_step; + int width; bool inv; bool *ok; }; -static bool DctIPPLoop(const Mat& src, Mat& dst, bool inv) +static bool DctIPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv) { bool ok; - parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker(src, dst, inv, &ok), src.rows/(double)(1<<4) ); + parallel_for_(Range(0, height), DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), height/(double)(1<<4) ); return ok; } -static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) +static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, bool row) { if(row) - return DctIPPLoop(src, dst, inv); + return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv); else { #if IPP_VERSION_X100 >= 900 - IppiSize srcRoiSize = {src.cols, src.rows}; + IppiSize srcRoiSize = {width, height}; int specSize = 0; int initSize = 0; @@ -3524,7 +4006,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) return false; } - if(ippDctFun(src.ptr(), (int)src.step,dst.ptr(), (int)dst.step, pDCTSpec, pBuffer) < 0) + if(ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, pBuffer) < 0) { IPP_RELEASE return false; @@ -3540,7 +4022,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) uchar* pBuffer = 0; int bufSize=0; - IppiSize srcRoiSize = {src.cols, src.rows}; + IppiSize srcRoiSize = {width, height}; CV_SUPPRESS_DEPRECATED_START @@ -3556,7 +4038,7 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) buf.allocate( bufSize ); pBuffer = (uchar*)buf; - status = ippDctFun(src.ptr(), (int)src.step, dst.ptr(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer); + status = ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, (Ipp8u*)pBuffer); } if (pDCTSpec) @@ -3574,140 +4056,219 @@ static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) } #endif +namespace cv { + +class OcvDctImpl +{ +public: + OcvDftOptions opt; + + int _factors[34]; + AutoBuffer wave_buf; + AutoBuffer itab_buf; + + DCTFunc dct_func; + bool isRowTransform; + bool isInverse; + bool isContinuous; + int start_stage; + int end_stage; + int width; + int height; + int depth; + + void init(int _width, int _height, int _depth, int flags) + { + width = _width; + height = _height; + depth = _depth; + isInverse = (flags & CV_HAL_DFT_INVERSE) != 0; + isRowTransform = (flags & CV_HAL_DFT_ROWS) != 0; + isContinuous = (flags & CV_HAL_DFT_IS_CONTINUOUS) != 0; + static DCTFunc dct_tbl[4] = + { + (DCTFunc)DCT_32f, + (DCTFunc)IDCT_32f, + (DCTFunc)DCT_64f, + (DCTFunc)IDCT_64f + }; + dct_func = dct_tbl[(int)isInverse + (depth == CV_64F)*2]; + opt.nf = 0; + opt.isComplex = false; + opt.isInverse = false; + opt.noPermute = false; + opt.scale = 1.; + opt.factors = _factors; + + if (isRowTransform || height == 1 || (width == 1 && isContinuous)) + { + start_stage = end_stage = 0; + } + else + { + start_stage = (width == 1); + end_stage = 1; + } + } + void run(uchar * src, int src_step, uchar * dst, int dst_step) + { + CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform)) + + AutoBuffer dct_wave; + AutoBuffer src_buf, dst_buf; + uchar *src_dft_buf = 0, *dst_dft_buf = 0; + int prev_len = 0; + int elem_size = (depth == CV_32F) ? sizeof(float) : sizeof(double); + int complex_elem_size = elem_size*2; + + for(int stage = start_stage ; stage <= end_stage; stage++ ) + { + const uchar* sptr = src; + uchar* dptr = dst; + size_t sstep0, sstep1, dstep0, dstep1; + int len, count; + + if( stage == 0 ) + { + len = width; + count = height; + if( len == 1 && !isRowTransform ) + { + len = height; + count = 1; + } + sstep0 = src_step; + dstep0 = dst_step; + sstep1 = dstep1 = elem_size; + } + else + { + len = height; + count = width; + sstep1 = src_step; + dstep1 = dst_step; + sstep0 = dstep0 = elem_size; + } + + opt.n = len; + opt.tab_size = len; + + if( len != prev_len ) + { + if( len > 1 && (len & 1) ) + CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" ); + + opt.nf = DFTFactorize( len, opt.factors ); + bool inplace_transform = opt.factors[0] == opt.factors[opt.nf-1]; + + wave_buf.allocate(len*complex_elem_size); + opt.wave = wave_buf; + itab_buf.allocate(len); + opt.itab = itab_buf; + DFTInit( len, opt.nf, opt.factors, opt.itab, complex_elem_size, opt.wave, isInverse ); + + dct_wave.allocate((len/2 + 1)*complex_elem_size); + src_buf.allocate(len*elem_size); + src_dft_buf = src_buf; + if(!inplace_transform) + { + dst_buf.allocate(len*elem_size); + dst_dft_buf = dst_buf; + } + else + { + dst_dft_buf = src_buf; + } + DCTInit( len, complex_elem_size, dct_wave, isInverse); + prev_len = len; + } + // otherwise reuse the tables calculated on the previous stage + for(int i = 0; i < count; i++ ) + { + dct_func( opt, sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf, + dptr + i*dstep0, (int)dstep1, dct_wave); + } + src = dst; + src_step = dst_step; + } + + } + void free() {} +}; + +namespace hal { + +void dctInit(DftContext & c, int width, int height, int depth, int flags) +{ + int res = cv_hal_dctInit(&c.impl, width, height, depth, flags); + if (res == CV_HAL_ERROR_OK) + { + c.useReplacement = true; + return; + } + c.useReplacement = false; + OcvDctImpl * impl = new OcvDctImpl(); + impl->init(width, height, depth, flags); + c.impl = impl; +} + +void dctRun(const DftContext & c, const void * src, int src_step, void * dst, int dst_step) +{ + if (c.useReplacement) + { + int res = cv_hal_dctRun(c.impl, src, src_step, dst, dst_step); + if (res != CV_HAL_ERROR_OK) + { + CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctRun"); + } + return; + } + OcvDctImpl * impl = (OcvDctImpl*)c.impl; + impl->run((uchar*)src, src_step, (uchar*)dst, dst_step); +} + +void dctFree(DftContext & c) +{ + if (c.useReplacement) + { + int res = cv_hal_dctFree(c.impl); + if (res != CV_HAL_ERROR_OK) + { + CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctFree"); + } + return; + } + OcvDctImpl * impl = (OcvDctImpl*)c.impl; + impl->free(); + delete impl; + c.impl = 0; +} + +} // cv::hal:: + +} // cv:: + void cv::dct( InputArray _src0, OutputArray _dst, int flags ) { - static DCTFunc dct_tbl[4] = - { - (DCTFunc)DCT_32f, - (DCTFunc)IDCT_32f, - (DCTFunc)DCT_64f, - (DCTFunc)IDCT_64f - }; - - bool inv = (flags & DCT_INVERSE) != 0; Mat src0 = _src0.getMat(), src = src0; int type = src.type(), depth = src.depth(); - void *spec = 0; - - double scale = 1.; - int prev_len = 0, nf = 0, stage, end_stage; - uchar *src_dft_buf = 0, *dst_dft_buf = 0; - uchar *dft_wave = 0, *dct_wave = 0; - int* itab = 0; - uchar* ptr = 0; - int elem_size = (int)src.elemSize(), complex_elem_size = elem_size*2; - int factors[34], inplace_transform; - int i, len, count; - AutoBuffer buf; CV_Assert( type == CV_32FC1 || type == CV_64FC1 ); _dst.create( src.rows, src.cols, type ); Mat dst = _dst.getMat(); - CV_IPP_RUN(IPP_VERSION_X100 >= 700 && src.type() == CV_32F, ippi_DCT_32f(src, dst, inv, ((flags & DCT_ROWS) != 0))) + int f = 0; + if ((flags & DFT_ROWS) != 0) + f |= CV_HAL_DFT_ROWS; + if ((flags & DCT_INVERSE) != 0) + f |= CV_HAL_DFT_INVERSE; + if (src.isContinuous() && dst.isContinuous()) + f |= CV_HAL_DFT_IS_CONTINUOUS; - DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2]; - - if( (flags & DCT_ROWS) || src.rows == 1 || - (src.cols == 1 && (src.isContinuous() && dst.isContinuous()))) - { - stage = end_stage = 0; - } - else - { - stage = src.cols == 1; - end_stage = 1; - } - - for( ; stage <= end_stage; stage++ ) - { - const uchar* sptr = src.ptr(); - uchar* dptr = dst.ptr(); - size_t sstep0, sstep1, dstep0, dstep1; - - if( stage == 0 ) - { - len = src.cols; - count = src.rows; - if( len == 1 && !(flags & DCT_ROWS) ) - { - len = src.rows; - count = 1; - } - sstep0 = src.step; - dstep0 = dst.step; - sstep1 = dstep1 = elem_size; - } - else - { - len = dst.rows; - count = dst.cols; - sstep1 = src.step; - dstep1 = dst.step; - sstep0 = dstep0 = elem_size; - } - - if( len != prev_len ) - { - int sz; - - if( len > 1 && (len & 1) ) - CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" ); - - sz = len*elem_size; - sz += (len/2 + 1)*complex_elem_size; - - spec = 0; - inplace_transform = 1; - { - sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size; - - nf = DFTFactorize( len, factors ); - inplace_transform = factors[0] == factors[nf-1]; - - i = nf > 1 && (factors[0] & 1) == 0; - if( (factors[i] & 1) != 0 && factors[i] > 5 ) - sz += (factors[i]+1)*complex_elem_size; - - if( !inplace_transform ) - sz += len*elem_size; - } - - buf.allocate( sz + 32 ); - ptr = (uchar*)buf; - - if( !spec ) - { - dft_wave = ptr; - ptr += len*complex_elem_size; - itab = (int*)ptr; - ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 ); - DFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv ); - } - - dct_wave = ptr; - ptr += (len/2 + 1)*complex_elem_size; - src_dft_buf = dst_dft_buf = ptr; - ptr += len*elem_size; - if( !inplace_transform ) - { - dst_dft_buf = ptr; - ptr += len*elem_size; - } - DCTInit( len, complex_elem_size, dct_wave, inv ); - if( !inv ) - scale += scale; - prev_len = len; - } - // otherwise reuse the tables calculated on the previous stage - for( i = 0; i < count; i++ ) - { - dct_func( sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf, - dptr + i*dstep0, (int)dstep1, len, nf, factors, - itab, dft_wave, dct_wave, spec, ptr ); - } - src = dst; - } + hal::DftContext c; + hal::dctInit(c, src.cols, src.rows, depth, f); + hal::dctRun(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step); + hal::dctFree(c); } diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index 69345ca4a..d4d43332c 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -384,6 +384,31 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int # pragma warning( pop ) #endif +inline int hal_ni_dftInit(void**, int, int, int, int, bool*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dftRun(const void*, const void*, void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dftFree(void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_dftInit hal_ni_dftInit +#define cv_hal_dftRun hal_ni_dftRun +#define cv_hal_dftFree hal_ni_dftFree + +inline int hal_ni_dftInit2D(void **, int, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dftRun2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dftFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_dftInit2D hal_ni_dftInit2D +#define cv_hal_dftRun2D hal_ni_dftRun2D +#define cv_hal_dftFree2D hal_ni_dftFree2D + + +inline int hal_ni_dctInit(void **, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dctRun(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dctFree(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +#define cv_hal_dctInit hal_ni_dctInit +#define cv_hal_dctRun hal_ni_dctRun +#define cv_hal_dctFree hal_ni_dctFree + #include "custom_hal.hpp" #endif diff --git a/modules/core/test/test_dxt.cpp b/modules/core/test/test_dxt.cpp index ad75e52dd..45994e0e1 100644 --- a/modules/core/test/test_dxt.cpp +++ b/modules/core/test/test_dxt.cpp @@ -887,3 +887,79 @@ TEST(Core_DFT, complex_output2) } } } + +class Core_DXTReverseTest : public cvtest::BaseTest +{ +public: + enum Mode + { + ModeDFT, + ModeDCT + }; + Core_DXTReverseTest(Mode m) : mode(m) {} +private: + Mode mode; +protected: + void run(int) + { + for (int i = 0; i < 3; ++i) + { + if (mode == ModeDCT && i != 0) + continue; + int flags = 0; + int flags_inv = DFT_INVERSE | DFT_SCALE; + int cn_in = 0; + int cn_out = 0; + switch (i) + { + case 0: cn_in = 1; cn_out = 1; break; + case 1: cn_in = 1; cn_out = 2; flags |= DFT_COMPLEX_OUTPUT; flags_inv |= DFT_REAL_OUTPUT; break; + case 2: cn_in = 2; cn_out = 2; break; + }; + for (int j = 0; j < 100; ++j) + { + RNG& rng = ts->get_rng(); + int type = rng.uniform(0, 2) ? CV_64F : CV_32F; + int m = rng.uniform(1, 10); + int n = rng.uniform(1, 10); + if (mode == ModeDCT) + { + m *= 2; + n *= 2; + } + Mat one(m, n, CV_MAKETYPE(type, cn_in)); + cvtest::randUni(rng, one, Scalar::all(-1.), Scalar::all(1.)); + Mat out; + Mat two; + if (mode == ModeDFT) + { + cv::dft(one, out, flags); + cv::dft(out, two, flags_inv); + } + else if (mode == ModeDCT) + { + cv::dct(one, out, flags); + cv::dct(out, two, flags_inv); + } + if (out.channels() != cn_out || two.channels() != cn_in || cvtest::norm(one, two, NORM_INF) > 1e-5) + { + cout << "Test #" << j + 1 << " - " + << "elements: " << m << " x " << n << ", " + << "channels: " + << one.channels() << " (" << cn_in << ")" << " -> " + << out.channels() << " (" << cn_out << ")" << " -> " + << two.channels() << " (" << cn_in << ")" + << endl; + cout << "signal:\n" << one << endl << endl; + cout << "spectrum:\n" << out << endl << endl; + cout << "inverse:\n" << two << endl << endl; + ts->set_failed_test_info(cvtest::TS::FAIL_INVALID_OUTPUT); + break; + } + } + } + } +}; + +TEST(Core_DFT, reverse) { Core_DXTReverseTest test(Core_DXTReverseTest::ModeDFT); test.safe_run(); } +TEST(Core_DCT, reverse) { Core_DXTReverseTest test(Core_DXTReverseTest::ModeDCT); test.safe_run(); } diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp index 59b07032b..64742eaa2 100644 --- a/modules/imgproc/src/templmatch.cpp +++ b/modules/imgproc/src/templmatch.cpp @@ -632,6 +632,8 @@ static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst) #endif +#include "opencv2/core/hal/hal.hpp" + void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, Size corrsize, int ctype, Point anchor, double delta, int borderType ) @@ -698,6 +700,9 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, buf.resize(bufSize); + hal::DftContext c; + hal::dftInit2D(c, dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows); + // compute DFT of each template plane for( k = 0; k < tcn; k++ ) { @@ -721,9 +726,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols)); part = Scalar::all(0); } - dft(dst, dst, 0, templ.rows); + hal::dftRun2D(c, dst.data, (int)dst.step, dst.data, (int)dst.step); } + hal::dftFree2D(c); + int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width; int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height; int tileCount = tileCountX * tileCountY; @@ -740,6 +747,16 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } borderType |= BORDER_ISOLATED; + bool useHalDft = tileCount > 1; + hal::DftContext cF, cR; + if (useHalDft) + { + int f = CV_HAL_DFT_IS_INPLACE; + int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE; + hal::dftInit2D(cF, dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); + hal::dftInit2D(cR, dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); + } + // calculate correlation by blocks for( i = 0; i < tileCount; i++ ) { @@ -777,11 +794,19 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0), x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); - dft( dftImg, dftImg, 0, dsz.height ); + if (useHalDft && bsz.height == blocksize.height) + hal::dftRun2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + else + dft( dftImg, dftImg, 0, dsz.height ); + Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0, dftsize.width, dftsize.height)); mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); - dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); + + if (useHalDft && bsz.height == blocksize.height) + hal::dftRun2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + else + dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); src = dftImg(Rect(0, 0, bsz.width, bsz.height)); @@ -813,6 +838,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } } } + if (useHalDft) + { + hal::dftFree2D(cF); + hal::dftFree2D(cR); + } } static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask ) From 15783cf668b989617f56c40aaf1ceed9e118086d Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Thu, 4 Feb 2016 19:29:23 +0300 Subject: [PATCH 3/7] Always use hal::dft in crossCorr function --- modules/imgproc/src/templmatch.cpp | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp index 64742eaa2..6353f14ff 100644 --- a/modules/imgproc/src/templmatch.cpp +++ b/modules/imgproc/src/templmatch.cpp @@ -747,15 +747,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } borderType |= BORDER_ISOLATED; - bool useHalDft = tileCount > 1; hal::DftContext cF, cR; - if (useHalDft) - { - int f = CV_HAL_DFT_IS_INPLACE; - int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE; - hal::dftInit2D(cF, dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); - hal::dftInit2D(cR, dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); - } + int f = CV_HAL_DFT_IS_INPLACE; + int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE; + hal::dftInit2D(cF, dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); + hal::dftInit2D(cR, dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); // calculate correlation by blocks for( i = 0; i < tileCount; i++ ) @@ -794,7 +790,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0), x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); - if (useHalDft && bsz.height == blocksize.height) + if (bsz.height == blocksize.height) hal::dftRun2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, 0, dsz.height ); @@ -803,7 +799,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, dftsize.width, dftsize.height)); mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); - if (useHalDft && bsz.height == blocksize.height) + if (bsz.height == blocksize.height) hal::dftRun2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); @@ -838,11 +834,8 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } } } - if (useHalDft) - { - hal::dftFree2D(cF); - hal::dftFree2D(cR); - } + hal::dftFree2D(cF); + hal::dftFree2D(cR); } static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask ) From f40d701427d0c9aed8dc78588401bdd36f35ea91 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 5 Feb 2016 11:40:40 +0300 Subject: [PATCH 4/7] DFT: renamed HAL functions --- modules/core/include/opencv2/core/hal/hal.hpp | 14 ++--- modules/core/src/dxt.cpp | 62 +++++++++---------- modules/core/src/hal_replacement.hpp | 28 ++++----- modules/imgproc/src/templmatch.cpp | 6 +- 4 files changed, 55 insertions(+), 55 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 52a5f99b3..6b9f93dbf 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -195,16 +195,16 @@ struct DftContext }; CV_EXPORTS void dftInit2D(DftContext & c, int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows = 0); -CV_EXPORTS void dftRun2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); +CV_EXPORTS void dft2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); CV_EXPORTS void dftFree2D(DftContext & c); -CV_EXPORTS void dftInit(DftContext & c, int len, int count, int depth, int flags, bool * useBuffer = 0); -CV_EXPORTS void dftRun(const DftContext & c, const void * src, void * dst); -CV_EXPORTS void dftFree(DftContext & c); +CV_EXPORTS void dftInit1D(DftContext & c, int len, int count, int depth, int flags, bool * useBuffer = 0); +CV_EXPORTS void dft1D(const DftContext & c, const void * src, void * dst); +CV_EXPORTS void dftFree1D(DftContext & c); -CV_EXPORTS void dctInit(DftContext & c, int width, int height, int depth, int flags); -CV_EXPORTS void dctRun(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); -CV_EXPORTS void dctFree(DftContext & c); +CV_EXPORTS void dctInit2D(DftContext & c, int width, int height, int depth, int flags); +CV_EXPORTS void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); +CV_EXPORTS void dctFree2D(DftContext & c); //! @} core_hal diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 1265091bc..1ea549675 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -2763,7 +2763,7 @@ public: count = height; } needBufferA = isInplace; - hal::dftInit(contextA, len, count, depth, f, &needBufferA); + hal::dftInit1D(contextA, len, count, depth, f, &needBufferA); if (needBufferA) tmp_bufA.allocate(len * complex_elem_size); } @@ -2773,7 +2773,7 @@ public: count = width; f |= CV_HAL_DFT_STAGE_COLS; needBufferB = isInplace; - hal::dftInit(contextB, len, count, depth, f, &needBufferB); + hal::dftInit1D(contextB, len, count, depth, f, &needBufferB); if (needBufferB) tmp_bufB.allocate(len * complex_elem_size); @@ -2864,8 +2864,8 @@ public: { if (useIpp) return; - hal::dftFree(contextA); - hal::dftFree(contextB); + hal::dftFree1D(contextA); + hal::dftFree1D(contextB); } protected: @@ -2909,7 +2909,7 @@ protected: if( needBufferA ) dptr = tmp_bufA; - hal::dftRun(contextA, sptr, dptr); + hal::dft1D(contextA, sptr, dptr); if( needBufferA ) memcpy( dptr0, dptr + dptr_offset, dst_full_len ); @@ -2983,8 +2983,8 @@ protected: } if( even ) - hal::dftRun(contextB, buf1, dbuf1); - hal::dftRun(contextB, buf0, dbuf0); + hal::dft1D(contextB, buf1, dbuf1); + hal::dft1D(contextB, buf0, dbuf0); if( stage_dst_channels == 1 ) { @@ -3032,12 +3032,12 @@ protected: if( i+1 < b ) { CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size ); - hal::dftRun(contextB, buf1, dbuf1); + hal::dft1D(contextB, buf1, dbuf1); } else CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size ); - hal::dftRun(contextB, buf0, dbuf0); + hal::dft1D(contextB, buf0, dbuf0); if( i+1 < b ) CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size ); @@ -3223,9 +3223,9 @@ namespace hal { //================== 1D ====================== -void dftInit(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer) +void dftInit1D(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer) { - int res = cv_hal_dftInit(&context.impl, len, count, depth, flags, needBuffer); + int res = cv_hal_dftInit1D(&context.impl, len, count, depth, flags, needBuffer); if (res == CV_HAL_ERROR_OK) { context.useReplacement = true; @@ -3242,11 +3242,11 @@ void dftInit(DftContext & context, int len, int count, int depth, int flags, boo c->init(len, count, depth, flags, needBuffer); } -void dftRun(const DftContext & context, const void * src, void * dst) +void dft1D(const DftContext & context, const void * src, void * dst) { if (context.useReplacement) { - int res = cv_hal_dftRun(context.impl, src, dst); + int res = cv_hal_dft1D(context.impl, src, dst); if (res != CV_HAL_ERROR_OK) { CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun"); @@ -3257,11 +3257,11 @@ void dftRun(const DftContext & context, const void * src, void * dst) c->run(src, dst); } -void dftFree(DftContext & context) +void dftFree1D(DftContext & context) { if (context.useReplacement) { - int res = cv_hal_dftFree(context.impl); + int res = cv_hal_dftFree1D(context.impl); if (res != CV_HAL_ERROR_OK) { CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree"); @@ -3282,9 +3282,9 @@ void dftFree(DftContext & context) //================== 2D ====================== void dftInit2D(DftContext & c, - int _width, int _height, int _depth, int _src_channels, int _dst_channels, - int flags, - int _nonzero_rows) + int _width, int _height, int _depth, int _src_channels, int _dst_channels, + int flags, + int _nonzero_rows) { int res = cv_hal_dftInit2D(&c.impl, _width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows); if (res == CV_HAL_ERROR_OK) @@ -3304,12 +3304,12 @@ void dftInit2D(DftContext & c, c.impl = (void*)d; } -void dftRun2D(const DftContext & c, - const void * src, int src_step, void * dst, int dst_step) +void dft2D(const DftContext & c, + const void * src, int src_step, void * dst, int dst_step) { if (c.useReplacement) { - int res = cv_hal_dftRun2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step); + int res = cv_hal_dft2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step); if (res != CV_HAL_ERROR_OK) { CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun2D"); @@ -3384,7 +3384,7 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) f |= CV_HAL_DFT_IS_INPLACE; hal::DftContext c; hal::dftInit2D(c, src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows); - hal::dftRun2D(c, src.data, (int)src.step, dst.data, (int)dst.step); + hal::dft2D(c, src.data, (int)src.step, dst.data, (int)dst.step); hal::dftFree2D(c); } @@ -4198,9 +4198,9 @@ public: namespace hal { -void dctInit(DftContext & c, int width, int height, int depth, int flags) +void dctInit2D(DftContext & c, int width, int height, int depth, int flags) { - int res = cv_hal_dctInit(&c.impl, width, height, depth, flags); + int res = cv_hal_dctInit2D(&c.impl, width, height, depth, flags); if (res == CV_HAL_ERROR_OK) { c.useReplacement = true; @@ -4212,11 +4212,11 @@ void dctInit(DftContext & c, int width, int height, int depth, int flags) c.impl = impl; } -void dctRun(const DftContext & c, const void * src, int src_step, void * dst, int dst_step) +void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step) { if (c.useReplacement) { - int res = cv_hal_dctRun(c.impl, src, src_step, dst, dst_step); + int res = cv_hal_dct2D(c.impl, src, src_step, dst, dst_step); if (res != CV_HAL_ERROR_OK) { CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctRun"); @@ -4227,11 +4227,11 @@ void dctRun(const DftContext & c, const void * src, int src_step, void * dst, in impl->run((uchar*)src, src_step, (uchar*)dst, dst_step); } -void dctFree(DftContext & c) +void dctFree2D(DftContext & c) { if (c.useReplacement) { - int res = cv_hal_dctFree(c.impl); + int res = cv_hal_dctFree2D(c.impl); if (res != CV_HAL_ERROR_OK) { CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctFree"); @@ -4266,9 +4266,9 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) f |= CV_HAL_DFT_IS_CONTINUOUS; hal::DftContext c; - hal::dctInit(c, src.cols, src.rows, depth, f); - hal::dctRun(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step); - hal::dctFree(c); + hal::dctInit2D(c, src.cols, src.rows, depth, f); + hal::dct2D(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step); + hal::dctFree2D(c); } diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index d4d43332c..bbf32f39d 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -384,30 +384,30 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int # pragma warning( pop ) #endif -inline int hal_ni_dftInit(void**, int, int, int, int, bool*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dftRun(const void*, const void*, void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dftFree(void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dftInit1D(void**, int, int, int, int, bool*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dft1D(const void*, const void*, void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dftFree1D(void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -#define cv_hal_dftInit hal_ni_dftInit -#define cv_hal_dftRun hal_ni_dftRun -#define cv_hal_dftFree hal_ni_dftFree +#define cv_hal_dftInit1D hal_ni_dftInit1D +#define cv_hal_dft1D hal_ni_dft1D +#define cv_hal_dftFree1D hal_ni_dftFree1D inline int hal_ni_dftInit2D(void **, int, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dftRun2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dft2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } inline int hal_ni_dftFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } #define cv_hal_dftInit2D hal_ni_dftInit2D -#define cv_hal_dftRun2D hal_ni_dftRun2D +#define cv_hal_dft2D hal_ni_dft2D #define cv_hal_dftFree2D hal_ni_dftFree2D -inline int hal_ni_dctInit(void **, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dctRun(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dctFree(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dctInit2D(void **, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dct2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +inline int hal_ni_dctFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -#define cv_hal_dctInit hal_ni_dctInit -#define cv_hal_dctRun hal_ni_dctRun -#define cv_hal_dctFree hal_ni_dctFree +#define cv_hal_dctInit2D hal_ni_dctInit2D +#define cv_hal_dct2D hal_ni_dct2D +#define cv_hal_dctFree2D hal_ni_dctFree2D #include "custom_hal.hpp" diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp index 6353f14ff..4e8958279 100644 --- a/modules/imgproc/src/templmatch.cpp +++ b/modules/imgproc/src/templmatch.cpp @@ -726,7 +726,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols)); part = Scalar::all(0); } - hal::dftRun2D(c, dst.data, (int)dst.step, dst.data, (int)dst.step); + hal::dft2D(c, dst.data, (int)dst.step, dst.data, (int)dst.step); } hal::dftFree2D(c); @@ -791,7 +791,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); if (bsz.height == blocksize.height) - hal::dftRun2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + hal::dft2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, 0, dsz.height ); @@ -800,7 +800,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); if (bsz.height == blocksize.height) - hal::dftRun2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + hal::dft2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); From 233612efd7925bd022777d297fdf65215f16dcc8 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Fri, 8 Apr 2016 16:03:51 +0300 Subject: [PATCH 5/7] Reworked HAL dft/dct interface, added replacement documentation --- modules/core/include/opencv2/core/hal/hal.hpp | 32 +- .../core/include/opencv2/core/hal/interface.h | 25 +- modules/core/src/dxt.cpp | 354 ++++++++---------- modules/core/src/hal_replacement.hpp | 121 ++++-- modules/imgproc/src/templmatch.cpp | 19 +- 5 files changed, 295 insertions(+), 256 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 6b9f93dbf..5b01cbe4c 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -187,24 +187,28 @@ CV_EXPORTS void addWeighted32s( const int* src1, size_t step1, const int* src2, CV_EXPORTS void addWeighted32f( const float* src1, size_t step1, const float* src2, size_t step2, float* dst, size_t step, int width, int height, void* scalars ); CV_EXPORTS void addWeighted64f( const double* src1, size_t step1, const double* src2, size_t step2, double* dst, size_t step, int width, int height, void* scalars ); -struct DftContext +struct CV_EXPORTS DFT1D { - void * impl; - bool useReplacement; - DftContext() : impl(0), useReplacement(false) {} + static Ptr create(int len, int count, int depth, int flags, bool * useBuffer = 0); + virtual void apply(const uchar *src, uchar *dst) = 0; + virtual ~DFT1D() {} }; -CV_EXPORTS void dftInit2D(DftContext & c, int _width, int _height, int _depth, int _src_channels, int _dst_channels, int flags, int _nonzero_rows = 0); -CV_EXPORTS void dft2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); -CV_EXPORTS void dftFree2D(DftContext & c); +struct CV_EXPORTS DFT2D +{ + static Ptr create(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows = 0); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DFT2D() {} +}; -CV_EXPORTS void dftInit1D(DftContext & c, int len, int count, int depth, int flags, bool * useBuffer = 0); -CV_EXPORTS void dft1D(const DftContext & c, const void * src, void * dst); -CV_EXPORTS void dftFree1D(DftContext & c); - -CV_EXPORTS void dctInit2D(DftContext & c, int width, int height, int depth, int flags); -CV_EXPORTS void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step); -CV_EXPORTS void dctFree2D(DftContext & c); +struct CV_EXPORTS DCT2D +{ + static Ptr create(int width, int height, int depth, int flags); + virtual void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) = 0; + virtual ~DCT2D() {} +}; //! @} core_hal diff --git a/modules/core/include/opencv2/core/hal/interface.h b/modules/core/include/opencv2/core/hal/interface.h index 0da68f18c..2bb7b19f2 100644 --- a/modules/core/include/opencv2/core/hal/interface.h +++ b/modules/core/include/opencv2/core/hal/interface.h @@ -11,21 +11,11 @@ #define CV_HAL_ERROR_UNKNOWN -1 //! @} - -#define CV_HAL_DFT_INVERSE 1 -#define CV_HAL_DFT_SCALE 2 -#define CV_HAL_DFT_ROWS 4 -#define CV_HAL_DFT_COMPLEX_OUTPUT 16 -#define CV_HAL_DFT_REAL_OUTPUT 32 -#define CV_HAL_DFT_TWO_STAGE 64 -#define CV_HAL_DFT_STAGE_COLS 128 -#define CV_HAL_DFT_IS_CONTINUOUS 512 -#define CV_HAL_DFT_IS_INPLACE 1024 - #ifdef __cplusplus #include #else #include +#include #endif //! @name Data types @@ -155,6 +145,19 @@ typedef signed char schar; #define CV_HAL_BORDER_ISOLATED 16 //! @} +//! @name DFT flags +//! @{ +#define CV_HAL_DFT_INVERSE 1 +#define CV_HAL_DFT_SCALE 2 +#define CV_HAL_DFT_ROWS 4 +#define CV_HAL_DFT_COMPLEX_OUTPUT 16 +#define CV_HAL_DFT_REAL_OUTPUT 32 +#define CV_HAL_DFT_TWO_STAGE 64 +#define CV_HAL_DFT_STAGE_COLS 128 +#define CV_HAL_DFT_IS_CONTINUOUS 512 +#define CV_HAL_DFT_IS_INPLACE 1024 +//! @} + //! @} #endif diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 1ea549675..2cff51d5a 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1553,7 +1553,7 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_C_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + Dft_C_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, const Dft& _ippidft, int _norm_flag, bool *_ok) : ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), @@ -1617,7 +1617,7 @@ public: } private: - uchar * src; + const uchar * src; int src_step; uchar * dst; int dst_step; @@ -1634,7 +1634,7 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_R_IPPLoop_Invoker(uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + Dft_R_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, const Dft& _ippidft, int _norm_flag, bool *_ok) : ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), @@ -1698,7 +1698,7 @@ public: } private: - uchar * src; + const uchar * src; int src_step; uchar * dst; int dst_step; @@ -1711,7 +1711,7 @@ private: }; template -bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) +bool Dft_C_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); @@ -1719,7 +1719,7 @@ bool Dft_C_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int wid } template -bool Dft_R_IPPLoop(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) +bool Dft_R_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); @@ -1750,7 +1750,7 @@ private: ippiDFT_R_Func func; }; -static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) +static bool ippi_DFT_C_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -1804,7 +1804,7 @@ static bool ippi_DFT_C_32F(uchar * src, int src_step, uchar * dst, int dst_step, return false; } -static bool ippi_DFT_R_32F(uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) +static bool ippi_DFT_R_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -2611,11 +2611,11 @@ inline DftDims determineDims(int rows, int cols, bool isRowWise, bool isContinuo return InvalidDim; } -class OcvDftImpl +class OcvDftImpl : public hal::DFT2D { protected: - hal::DftContext contextA; - hal::DftContext contextB; + Ptr contextA; + Ptr contextB; bool needBufferA; bool needBufferB; bool inv; @@ -2763,7 +2763,7 @@ public: count = height; } needBufferA = isInplace; - hal::dftInit1D(contextA, len, count, depth, f, &needBufferA); + contextA = hal::DFT1D::create(len, count, depth, f, &needBufferA); if (needBufferA) tmp_bufA.allocate(len * complex_elem_size); } @@ -2773,7 +2773,7 @@ public: count = width; f |= CV_HAL_DFT_STAGE_COLS; needBufferB = isInplace; - hal::dftInit1D(contextB, len, count, depth, f, &needBufferB); + contextB = hal::DFT1D::create(len, count, depth, f, &needBufferB); if (needBufferB) tmp_bufB.allocate(len * complex_elem_size); @@ -2783,7 +2783,7 @@ public: } } - void run(uchar * src, int src_step, uchar * dst, int dst_step) + void apply(const uchar * src, size_t src_step, uchar * dst, size_t dst_step) { #if defined USE_IPP_DFT if (useIpp) @@ -2860,17 +2860,9 @@ public: } } - void free() - { - if (useIpp) - return; - hal::dftFree1D(contextA); - hal::dftFree1D(contextB); - } - protected: - void rowDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage) + void rowDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage) { int len, count; if (width == 1 && !isRowTransform ) @@ -2909,7 +2901,7 @@ protected: if( needBufferA ) dptr = tmp_bufA; - hal::dft1D(contextA, sptr, dptr); + contextA->apply(sptr, dptr); if( needBufferA ) memcpy( dptr0, dptr + dptr_offset, dst_full_len ); @@ -2924,7 +2916,7 @@ protected: complementComplexOutput(depth, dst_data, dst_step, len, nz, 1); } - void colDft(uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) + void colDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) { int len = height; int count = width; @@ -2983,8 +2975,8 @@ protected: } if( even ) - hal::dft1D(contextB, buf1, dbuf1); - hal::dft1D(contextB, buf0, dbuf0); + contextB->apply(buf1, dbuf1); + contextB->apply(buf0, dbuf0); if( stage_dst_channels == 1 ) { @@ -3032,12 +3024,12 @@ protected: if( i+1 < b ) { CopyFrom2Columns( sptr0, src_step, buf0, buf1, len, complex_elem_size ); - hal::dft1D(contextB, buf1, dbuf1); + contextB->apply(buf1, dbuf1); } else CopyColumn( sptr0, src_step, buf0, complex_elem_size, len, complex_elem_size ); - hal::dft1D(contextB, buf0, dbuf0); + contextB->apply(buf0, dbuf0); if( i+1 < b ) CopyTo2Columns( dbuf0, dbuf1, dptr0, dst_step, len, complex_elem_size ); @@ -3051,7 +3043,7 @@ protected: } }; -class OcvDftBasicImpl +class OcvDftBasicImpl : public hal::DFT1D { public: OcvDftOptions opt; @@ -3068,11 +3060,6 @@ public: { opt.factors = _factors; } - OcvDftBasicImpl & operator=(const OcvDftBasicImpl & other) - { - this->opt = other.opt; - return *this; - } void init(int len, int count, int depth, int flags, bool *needBuffer) { int prev_len = opt.n; @@ -3211,7 +3198,7 @@ public: } } - void run(const void * src, void * dst) + void apply(const uchar *src, uchar *dst) { opt.dft_func(opt, src, dst); } @@ -3219,126 +3206,113 @@ public: void free() {} }; +struct ReplacementDFT1D : public hal::DFT1D +{ + cvhalDFT *context; + bool isInitialized; + + ReplacementDFT1D() : context(0), isInitialized(false) {} + bool init(int len, int count, int depth, int flags, bool *needBuffer) + { + int res = cv_hal_dftInit1D(&context, len, count, depth, flags, needBuffer); + isInitialized = (res == CV_HAL_ERROR_OK); + return isInitialized; + } + void apply(const uchar *src, uchar *dst) + { + if (isInitialized) + { + CALL_HAL(dft1D, cv_hal_dft1D, context, src, dst); + } + } + ~ReplacementDFT1D() + { + if (isInitialized) + { + CALL_HAL(dftFree1D, cv_hal_dftFree1D, context); + } + } +}; + +struct ReplacementDFT2D : public hal::DFT2D +{ + cvhalDFT *context; + bool isInitialized; + + ReplacementDFT2D() : context(0), isInitialized(false) {} + bool init(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows) + { + int res = cv_hal_dftInit2D(&context, width, height, depth, src_channels, dst_channels, flags, nonzero_rows); + isInitialized = (res == CV_HAL_ERROR_OK); + return isInitialized; + } + void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) + { + if (isInitialized) + { + CALL_HAL(dft2D, cv_hal_dft2D, context, src, src_step, dst, dst_step); + } + } + ~ReplacementDFT2D() + { + if (isInitialized) + { + CALL_HAL(dftFree2D, cv_hal_dftFree1D, context); + } + } +}; + namespace hal { //================== 1D ====================== -void dftInit1D(DftContext & context, int len, int count, int depth, int flags, bool *needBuffer) +Ptr DFT1D::create(int len, int count, int depth, int flags, bool *needBuffer) { - int res = cv_hal_dftInit1D(&context.impl, len, count, depth, flags, needBuffer); - if (res == CV_HAL_ERROR_OK) { - context.useReplacement = true; - return; - } - - context.useReplacement = false; - OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; - if (!c) - { - c = new OcvDftBasicImpl(); - context.impl = (void*)c; - } - c->init(len, count, depth, flags, needBuffer); -} - -void dft1D(const DftContext & context, const void * src, void * dst) -{ - if (context.useReplacement) - { - int res = cv_hal_dft1D(context.impl, src, dst); - if (res != CV_HAL_ERROR_OK) + ReplacementDFT1D *impl = new ReplacementDFT1D(); + if (impl->init(len, count, depth, flags, needBuffer)) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun"); + return Ptr(impl); } - return; + delete impl; } - OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; - c->run(src, dst); -} - -void dftFree1D(DftContext & context) -{ - if (context.useReplacement) { - int res = cv_hal_dftFree1D(context.impl); - if (res != CV_HAL_ERROR_OK) - { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree"); - } - return; - } - - OcvDftBasicImpl * c = (OcvDftBasicImpl*)context.impl; - if (c) - { - c->free(); - delete c; - context.impl = 0; + OcvDftBasicImpl *impl = new OcvDftBasicImpl(); + impl->init(len, count, depth, flags, needBuffer); + return Ptr(impl); } } - //================== 2D ====================== -void dftInit2D(DftContext & c, - int _width, int _height, int _depth, int _src_channels, int _dst_channels, - int flags, - int _nonzero_rows) +Ptr DFT2D::create(int width, int height, int depth, + int src_channels, int dst_channels, + int flags, int nonzero_rows) { - int res = cv_hal_dftInit2D(&c.impl, _width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows); - if (res == CV_HAL_ERROR_OK) { - c.useReplacement = true; - return; - } - c.useReplacement = false; - - if( _width == 1 && _nonzero_rows > 0 ) - CV_Error( CV_StsNotImplemented, - "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n" - "For fast convolution/correlation use 2-column matrix or single-row matrix instead" ); - - OcvDftImpl * d = new OcvDftImpl(); - d->init(_width, _height, _depth, _src_channels, _dst_channels, flags, _nonzero_rows); - c.impl = (void*)d; -} - -void dft2D(const DftContext & c, - const void * src, int src_step, void * dst, int dst_step) -{ - if (c.useReplacement) - { - int res = cv_hal_dft2D(c.impl, (uchar*)src, src_step, (uchar*)dst, dst_step); - if (res != CV_HAL_ERROR_OK) + ReplacementDFT2D *impl = new ReplacementDFT2D(); + if (impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows)) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftRun2D"); + return Ptr(impl); } - return; + delete impl; } - OcvDftImpl * d = (OcvDftImpl*)c.impl; - d->run((uchar*)src, src_step, (uchar*)dst, dst_step); -} - -void dftFree2D(DftContext & c) -{ - if (c.useReplacement) { - int res = cv_hal_dftFree2D(c.impl); - if (res != CV_HAL_ERROR_OK) + if(width == 1 && nonzero_rows > 0 ) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dftFree2D"); + CV_Error( CV_StsNotImplemented, + "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n" + "For fast convolution/correlation use 2-column matrix or single-row matrix instead" ); } - return; + OcvDftImpl *impl = new OcvDftImpl(); + impl->init(width, height, depth, src_channels, dst_channels, flags, nonzero_rows); + return Ptr(impl); } - OcvDftImpl * d = (OcvDftImpl*)c.impl; - d->free(); - delete d; - c.impl = 0; } } // cv::hal:: - } // cv:: @@ -3382,10 +3356,8 @@ void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) f |= CV_HAL_DFT_SCALE; if (src.data == dst.data) f |= CV_HAL_DFT_IS_INPLACE; - hal::DftContext c; - hal::dftInit2D(c, src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows); - hal::dft2D(c, src.data, (int)src.step, dst.data, (int)dst.step); - hal::dftFree2D(c); + Ptr c = hal::DFT2D::create(src.cols, src.rows, depth, src.channels(), dst.channels(), f, nonzero_rows); + c->apply(src.data, src.step, dst.data, dst.step); } @@ -3607,7 +3579,7 @@ namespace cv http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/: */ template static void -DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, +DCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step, const Complex* dct_wave ) { static const T sin_45 = (T)0.70710678118654752440084436210485; @@ -3650,7 +3622,7 @@ DCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst template static void -IDCT( const OcvDftOptions & c, const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, +IDCT( const OcvDftOptions & c, const T* src, size_t src_step, T* dft_src, T* dft_dst, T* dst, size_t dst_step, const Complex* dct_wave) { static const T sin_45 = (T)0.70710678118654752440084436210485; @@ -3768,29 +3740,29 @@ DCTInit( int n, int elem_size, void* _wave, int inv ) } -typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, int src_step, void* dft_src, - void* dft_dst, void* dst, int dst_step, const void* dct_wave); +typedef void (*DCTFunc)(const OcvDftOptions & c, const void* src, size_t src_step, void* dft_src, + void* dft_dst, void* dst, size_t dst_step, const void* dct_wave); -static void DCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, - float* dst, int dst_step, const Complexf* dct_wave) +static void DCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst, + float* dst, size_t dst_step, const Complexf* dct_wave) { DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void IDCT_32f(const OcvDftOptions & c, const float* src, int src_step, float* dft_src, float* dft_dst, - float* dst, int dst_step, const Complexf* dct_wave) +static void IDCT_32f(const OcvDftOptions & c, const float* src, size_t src_step, float* dft_src, float* dft_dst, + float* dst, size_t dst_step, const Complexf* dct_wave) { IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void DCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, - double* dst, int dst_step, const Complexd* dct_wave) +static void DCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst, + double* dst, size_t dst_step, const Complexd* dct_wave) { DCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } -static void IDCT_64f(const OcvDftOptions & c, const double* src, int src_step, double* dft_src, double* dft_dst, - double* dst, int dst_step, const Complexd* dct_wave) +static void IDCT_64f(const OcvDftOptions & c, const double* src, size_t src_step, double* dft_src, double* dft_dst, + double* dst, size_t dst_step, const Complexd* dct_wave) { IDCT(c, src, src_step, dft_src, dft_dst, dst, dst_step, dct_wave); } @@ -4058,7 +4030,7 @@ static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_s namespace cv { -class OcvDctImpl +class OcvDctImpl : public hal::DCT2D { public: OcvDftOptions opt; @@ -4110,7 +4082,7 @@ public: end_stage = 1; } } - void run(uchar * src, int src_step, uchar * dst, int dst_step) + void apply(const uchar *src, size_t src_step, uchar *dst, size_t dst_step) { CV_IPP_RUN(IPP_VERSION_X100 >= 700 && depth == CV_32F, ippi_DCT_32f(src, src_step, dst, dst_step, width, height, isInverse, isRowTransform)) @@ -4183,69 +4155,65 @@ public: prev_len = len; } // otherwise reuse the tables calculated on the previous stage - for(int i = 0; i < count; i++ ) + for(unsigned i = 0; i < static_cast(count); i++ ) { - dct_func( opt, sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf, - dptr + i*dstep0, (int)dstep1, dct_wave); + dct_func( opt, sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf, + dptr + i*dstep0, dstep1, dct_wave); } src = dst; src_step = dst_step; } - } - void free() {} +}; + +struct ReplacementDCT2D : public hal::DCT2D +{ + cvhalDFT *context; + bool isInitialized; + + ReplacementDCT2D() : context(0), isInitialized(false) {} + bool init(int width, int height, int depth, int flags) + { + int res = hal_ni_dctInit2D(&context, width, height, depth, flags); + isInitialized = (res == CV_HAL_ERROR_OK); + return isInitialized; + } + void apply(const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) + { + if (isInitialized) + { + CALL_HAL(dct2D, cv_hal_dct2D, context, src_data, src_step, dst_data, dst_step); + } + } + ~ReplacementDCT2D() + { + if (isInitialized) + { + CALL_HAL(dctFree2D, cv_hal_dctFree2D, context); + } + } }; namespace hal { -void dctInit2D(DftContext & c, int width, int height, int depth, int flags) +Ptr DCT2D::create(int width, int height, int depth, int flags) { - int res = cv_hal_dctInit2D(&c.impl, width, height, depth, flags); - if (res == CV_HAL_ERROR_OK) { - c.useReplacement = true; - return; - } - c.useReplacement = false; - OcvDctImpl * impl = new OcvDctImpl(); - impl->init(width, height, depth, flags); - c.impl = impl; -} - -void dct2D(const DftContext & c, const void * src, int src_step, void * dst, int dst_step) -{ - if (c.useReplacement) - { - int res = cv_hal_dct2D(c.impl, src, src_step, dst, dst_step); - if (res != CV_HAL_ERROR_OK) + ReplacementDCT2D *impl = new ReplacementDCT2D(); + if (impl->init(width, height, depth, flags)) { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctRun"); + return Ptr(impl); } - return; + delete impl; } - OcvDctImpl * impl = (OcvDctImpl*)c.impl; - impl->run((uchar*)src, src_step, (uchar*)dst, dst_step); -} - -void dctFree2D(DftContext & c) -{ - if (c.useReplacement) { - int res = cv_hal_dctFree2D(c.impl); - if (res != CV_HAL_ERROR_OK) - { - CV_Error( CV_StsNotImplemented, "Custom HAL implementation failed to call dctFree"); - } - return; + OcvDctImpl *impl = new OcvDctImpl(); + impl->init(width, height, depth, flags); + return Ptr(impl); } - OcvDctImpl * impl = (OcvDctImpl*)c.impl; - impl->free(); - delete impl; - c.impl = 0; } } // cv::hal:: - } // cv:: void cv::dct( InputArray _src0, OutputArray _dst, int flags ) @@ -4265,10 +4233,8 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags ) if (src.isContinuous() && dst.isContinuous()) f |= CV_HAL_DFT_IS_CONTINUOUS; - hal::DftContext c; - hal::dctInit2D(c, src.cols, src.rows, depth, f); - hal::dct2D(c, (void*)src.data, (int)src.step, (void*)dst.data, (int)dst.step); - hal::dctFree2D(c); + Ptr c = hal::DCT2D::create(src.cols, src.rows, depth, f); + c->apply(src.data, src.step, dst.data, dst.step); } diff --git a/modules/core/src/hal_replacement.hpp b/modules/core/src/hal_replacement.hpp index bbf32f39d..93476c459 100644 --- a/modules/core/src/hal_replacement.hpp +++ b/modules/core/src/hal_replacement.hpp @@ -376,6 +376,102 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int #define cv_hal_merge64s hal_ni_merge64s //! @endcond +/** +@brief Dummy structure storing DFT/DCT context + +Users can convert this pointer to any type they want. Initialisation and destruction should be made in Init and Free function implementations correspondingly. +Example: +@code{.cpp} +int my_hal_dftInit2D(cvhalDFT **context, ...) { + *context = static_cast(new MyFilterData()); + //... init +} + +int my_hal_dftFree2D(cvhalDFT *context) { + MyFilterData *c = static_cast(context); + delete c; +} +@endcode + */ +struct cvhalDFT {}; + +/** +@param context double pointer to context storing all necessary data +@param len transformed array length +@param count estimated transformation count +@param depth array type (CV_32F or CV_64F) +@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, CV_HAL_DFT_SCALE, ...) +@param needBuffer pointer to boolean variable, if valid pointer provided, then variable value should be set to true to signal that additional memory buffer is needed for operations + */ +inline int hal_ni_dftInit1D(cvhalDFT **context, int len, int count, int depth, int flags, bool *needBuffer) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data +@param src source data +@param dst destination data + */ +inline int hal_ni_dft1D(cvhalDFT *context, const uchar *src, uchar *dst) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data + */ +inline int hal_ni_dftFree1D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +//! @cond IGNORED +#define cv_hal_dftInit1D hal_ni_dftInit1D +#define cv_hal_dft1D hal_ni_dft1D +#define cv_hal_dftFree1D hal_ni_dftFree1D +//! @endcond + +/** +@param context double pointer to context storing all necessary data +@param width,height image dimensions +@param depth image type (CV_32F or CV64F) +@param src_channels number of channels in input image +@param dst_channels number of channels in output image +@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...) +@param nonzero_rows number of nonzero rows in image, can be used for optimization + */ +inline int hal_ni_dftInit2D(cvhalDFT **context, int width, int height, int depth, int src_channels, int dst_channels, int flags, int nonzero_rows) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data +@param src_data,src_step source image data and step +@param dst_data,dst_step destination image data and step + */ +inline int hal_ni_dft2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data + */ +inline int hal_ni_dftFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +//! @cond IGNORED +#define cv_hal_dftInit2D hal_ni_dftInit2D +#define cv_hal_dft2D hal_ni_dft2D +#define cv_hal_dftFree2D hal_ni_dftFree2D +//! @endcond + +/** +@param context double pointer to context storing all necessary data +@param width,height image dimensions +@param depth image type (CV_32F or CV64F) +@param flags algorithm options (combination of CV_HAL_DFT_INVERSE, ...) + */ +inline int hal_ni_dctInit2D(cvhalDFT **context, int width, int height, int depth, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data +@param src_data,src_step source image data and step +@param dst_data,dst_step destination image data and step + */ +inline int hal_ni_dct2D(cvhalDFT *context, const uchar *src_data, size_t src_step, uchar *dst_data, size_t dst_step) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } +/** +@param context pointer to context storing all necessary data + */ +inline int hal_ni_dctFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } + +//! @cond IGNORED +#define cv_hal_dctInit2D hal_ni_dctInit2D +#define cv_hal_dct2D hal_ni_dct2D +#define cv_hal_dctFree2D hal_ni_dctFree2D +//! @endcond + //! @} #if defined __GNUC__ @@ -384,31 +480,6 @@ inline int hal_ni_merge64s(const int64 **src_data, int64 *dst_data, int len, int # pragma warning( pop ) #endif -inline int hal_ni_dftInit1D(void**, int, int, int, int, bool*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dft1D(const void*, const void*, void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dftFree1D(void*) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } - -#define cv_hal_dftInit1D hal_ni_dftInit1D -#define cv_hal_dft1D hal_ni_dft1D -#define cv_hal_dftFree1D hal_ni_dftFree1D - -inline int hal_ni_dftInit2D(void **, int, int, int, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dft2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dftFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } - -#define cv_hal_dftInit2D hal_ni_dftInit2D -#define cv_hal_dft2D hal_ni_dft2D -#define cv_hal_dftFree2D hal_ni_dftFree2D - - -inline int hal_ni_dctInit2D(void **, int, int, int, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dct2D(const void *, const void *, int, void *, int) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } -inline int hal_ni_dctFree2D(void *) { return CV_HAL_ERROR_NOT_IMPLEMENTED; } - -#define cv_hal_dctInit2D hal_ni_dctInit2D -#define cv_hal_dct2D hal_ni_dct2D -#define cv_hal_dctFree2D hal_ni_dctFree2D - #include "custom_hal.hpp" #endif diff --git a/modules/imgproc/src/templmatch.cpp b/modules/imgproc/src/templmatch.cpp index 4e8958279..019c41f33 100644 --- a/modules/imgproc/src/templmatch.cpp +++ b/modules/imgproc/src/templmatch.cpp @@ -700,8 +700,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, buf.resize(bufSize); - hal::DftContext c; - hal::dftInit2D(c, dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows); + Ptr c = hal::DFT2D::create(dftsize.width, dftsize.height, dftTempl.depth(), 1, 1, CV_HAL_DFT_IS_INPLACE, templ.rows); // compute DFT of each template plane for( k = 0; k < tcn; k++ ) @@ -726,11 +725,9 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols)); part = Scalar::all(0); } - hal::dft2D(c, dst.data, (int)dst.step, dst.data, (int)dst.step); + c->apply(dst.data, (int)dst.step, dst.data, (int)dst.step); } - hal::dftFree2D(c); - int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width; int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height; int tileCount = tileCountX * tileCountY; @@ -747,11 +744,11 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } borderType |= BORDER_ISOLATED; - hal::DftContext cF, cR; + Ptr cF, cR; int f = CV_HAL_DFT_IS_INPLACE; int f_inv = f | CV_HAL_DFT_INVERSE | CV_HAL_DFT_SCALE; - hal::dftInit2D(cF, dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); - hal::dftInit2D(cR, dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); + cF = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f, blocksize.height + templ.rows - 1); + cR = hal::DFT2D::create(dftsize.width, dftsize.height, maxDepth, 1, 1, f_inv, blocksize.height); // calculate correlation by blocks for( i = 0; i < tileCount; i++ ) @@ -791,7 +788,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); if (bsz.height == blocksize.height) - hal::dft2D(cF, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + cF->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, 0, dsz.height ); @@ -800,7 +797,7 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); if (bsz.height == blocksize.height) - hal::dft2D(cR, dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); + cR->apply(dftImg.data, (int)dftImg.step, dftImg.data, (int)dftImg.step); else dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); @@ -834,8 +831,6 @@ void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, } } } - hal::dftFree2D(cF); - hal::dftFree2D(cR); } static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask ) From 11378fcb178b6d86b90f9d3a096da34f2176e996 Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 19 Apr 2016 14:50:07 +0300 Subject: [PATCH 6/7] Fixed compiation problems --- modules/core/include/opencv2/core/hal/hal.hpp | 1 + modules/core/src/dxt.cpp | 44 +++++++++---------- 2 files changed, 23 insertions(+), 22 deletions(-) diff --git a/modules/core/include/opencv2/core/hal/hal.hpp b/modules/core/include/opencv2/core/hal/hal.hpp index 5b01cbe4c..09bcd72d5 100644 --- a/modules/core/include/opencv2/core/hal/hal.hpp +++ b/modules/core/include/opencv2/core/hal/hal.hpp @@ -46,6 +46,7 @@ #define __OPENCV_HAL_HPP__ #include "opencv2/core/cvdef.h" +#include "opencv2/core/cvstd.hpp" #include "opencv2/core/hal/interface.h" //! @cond IGNORED diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 2cff51d5a..164b0f10a 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -1553,7 +1553,7 @@ class Dft_C_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_C_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + Dft_C_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, const Dft& _ippidft, int _norm_flag, bool *_ok) : ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), @@ -1618,9 +1618,9 @@ public: private: const uchar * src; - int src_step; + size_t src_step; uchar * dst; - int dst_step; + size_t dst_step; int width; const Dft& ippidft; int norm_flag; @@ -1634,7 +1634,7 @@ class Dft_R_IPPLoop_Invoker : public ParallelLoopBody { public: - Dft_R_IPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, + Dft_R_IPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, const Dft& _ippidft, int _norm_flag, bool *_ok) : ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), @@ -1699,9 +1699,9 @@ public: private: const uchar * src; - int src_step; + size_t src_step; uchar * dst; - int dst_step; + size_t dst_step; int width; const Dft& ippidft; int norm_flag; @@ -1711,7 +1711,7 @@ private: }; template -bool Dft_C_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) +bool Dft_C_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; parallel_for_(Range(0, height), Dft_C_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); @@ -1719,7 +1719,7 @@ bool Dft_C_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, i } template -bool Dft_R_IPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, const Dft& ippidft, int norm_flag) +bool Dft_R_IPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, const Dft& ippidft, int norm_flag) { bool ok; parallel_for_(Range(0, height), Dft_R_IPPLoop_Invoker(src, src_step, dst, dst_step, width, ippidft, norm_flag, &ok), (width * height)/(double)(1<<16) ); @@ -1730,9 +1730,9 @@ struct IPPDFT_C_Functor { IPPDFT_C_Functor(ippiDFT_C_Func _func) : func(_func){} - bool operator()(const Ipp32fc* src, int srcStep, Ipp32fc* dst, int dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const + bool operator()(const Ipp32fc* src, size_t srcStep, Ipp32fc* dst, size_t dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const { - return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false; + return func ? func(src, static_cast(srcStep), dst, static_cast(dstStep), pDFTSpec, pBuffer) >= 0 : false; } private: ippiDFT_C_Func func; @@ -1742,15 +1742,15 @@ struct IPPDFT_R_Functor { IPPDFT_R_Functor(ippiDFT_R_Func _func) : func(_func){} - bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const + bool operator()(const Ipp32f* src, size_t srcStep, Ipp32f* dst, size_t dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const { - return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false; + return func ? func(src, static_cast(srcStep), dst, static_cast(dstStep), pDFTSpec, pBuffer) >= 0 : false; } private: ippiDFT_R_Func func; }; -static bool ippi_DFT_C_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) +static bool ippi_DFT_C_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -1787,9 +1787,9 @@ static bool ippi_DFT_C_32F(const uchar * src, int src_step, uchar * dst, int dst } if (!inv) - status = ippiDFTFwd_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer ); + status = ippiDFTFwd_CToC_32fc_C1R( (Ipp32fc*)src, static_cast(src_step), (Ipp32fc*)dst, static_cast(dst_step), pDFTSpec, pBuffer ); else - status = ippiDFTInv_CToC_32fc_C1R( (Ipp32fc*)src, src_step, (Ipp32fc*)dst, dst_step, pDFTSpec, pBuffer ); + status = ippiDFTInv_CToC_32fc_C1R( (Ipp32fc*)src, static_cast(src_step), (Ipp32fc*)dst, static_cast(dst_step), pDFTSpec, pBuffer ); if ( sizeBuffer > 0 ) ippFree( pBuffer ); @@ -1804,7 +1804,7 @@ static bool ippi_DFT_C_32F(const uchar * src, int src_step, uchar * dst, int dst return false; } -static bool ippi_DFT_R_32F(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, int norm_flag) +static bool ippi_DFT_R_32F(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, int norm_flag) { IppStatus status; Ipp8u* pBuffer = 0; @@ -1841,9 +1841,9 @@ static bool ippi_DFT_R_32F(const uchar * src, int src_step, uchar * dst, int dst } if (!inv) - status = ippiDFTFwd_RToPack_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer ); + status = ippiDFTFwd_RToPack_32f_C1R( (float*)src, static_cast(src_step), (float*)dst, static_cast(dst_step), pDFTSpec, pBuffer ); else - status = ippiDFTInv_PackToR_32f_C1R( (float*)src, src_step, (float*)dst, dst_step, pDFTSpec, pBuffer ); + status = ippiDFTInv_PackToR_32f_C1R( (float*)src, static_cast(src_step), (float*)dst, static_cast(dst_step), pDFTSpec, pBuffer ); if ( sizeBuffer > 0 ) ippFree( pBuffer ); @@ -2487,7 +2487,7 @@ namespace cv { template -static void complementComplex(T * ptr, int step, int n, int len, int dft_dims) +static void complementComplex(T * ptr, size_t step, int n, int len, int dft_dims) { T* p0 = (T*)ptr; size_t dstep = step/sizeof(p0[0]); @@ -2504,7 +2504,7 @@ static void complementComplex(T * ptr, int step, int n, int len, int dft_dims) } } -static void complementComplexOutput(int depth, uchar * ptr, int step, int count, int len, int dft_dims) +static void complementComplexOutput(int depth, uchar * ptr, size_t step, int count, int len, int dft_dims) { if( depth == CV_32F ) complementComplex((float*)ptr, step, count, len, dft_dims); @@ -2862,7 +2862,7 @@ public: protected: - void rowDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, bool isComplex, bool isLastStage) + void rowDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, bool isComplex, bool isLastStage) { int len, count; if (width == 1 && !isRowTransform ) @@ -2916,7 +2916,7 @@ protected: complementComplexOutput(depth, dst_data, dst_step, len, nz, 1); } - void colDft(const uchar* src_data, int src_step, uchar* dst_data, int dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) + void colDft(const uchar* src_data, size_t src_step, uchar* dst_data, size_t dst_step, int stage_src_channels, int stage_dst_channels, bool isLastStage) { int len = height; int count = width; From 5a938309c1feba9fe81c23c17cd364943f7177cc Mon Sep 17 00:00:00 2001 From: Maksim Shabunin Date: Tue, 19 Apr 2016 16:08:48 +0300 Subject: [PATCH 7/7] More compilation warnings fixed --- modules/core/src/dxt.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/core/src/dxt.cpp b/modules/core/src/dxt.cpp index 164b0f10a..162052667 100644 --- a/modules/core/src/dxt.cpp +++ b/modules/core/src/dxt.cpp @@ -3787,7 +3787,7 @@ typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*); class DctIPPLoop_Invoker : public ParallelLoopBody { public: - DctIPPLoop_Invoker(const uchar * _src, int _src_step, uchar * _dst, int _dst_step, int _width, bool _inv, bool *_ok) : + DctIPPLoop_Invoker(const uchar * _src, size_t _src_step, uchar * _dst, size_t _dst_step, int _width, bool _inv, bool *_ok) : ParallelLoopBody(), src(_src), src_step(_src_step), dst(_dst), dst_step(_dst_step), width(_width), inv(_inv), ok(_ok) { *ok = true; @@ -3856,7 +3856,7 @@ public: for(int i = range.start; i < range.end; ++i) { - if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, pBuffer) < 0) + if(ippDctFun((float*)(src + src_step * i), static_cast(src_step), (float*)(dst + dst_step * i), static_cast(dst_step), pDCTSpec, pBuffer) < 0) { *ok = false; IPP_RETURN @@ -3886,7 +3886,7 @@ public: for( int i = range.start; i < range.end; ++i) { - if(ippDctFun((float*)(src + src_step * i), src_step, (float*)(dst + dst_step * i), dst_step, pDCTSpec, (Ipp8u*)pBuffer) < 0) + if(ippDctFun((float*)(src + src_step * i), static_cast(src_step), (float*)(dst + dst_step * i), static_cast(dst_step), pDCTSpec, (Ipp8u*)pBuffer) < 0) { *ok = false; break; @@ -3908,22 +3908,22 @@ public: private: const uchar * src; - int src_step; + size_t src_step; uchar * dst; - int dst_step; + size_t dst_step; int width; bool inv; bool *ok; }; -static bool DctIPPLoop(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv) +static bool DctIPPLoop(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv) { bool ok; parallel_for_(Range(0, height), DctIPPLoop_Invoker(src, src_step, dst, dst_step, width, inv, &ok), height/(double)(1<<4) ); return ok; } -static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_step, int width, int height, bool inv, bool row) +static bool ippi_DCT_32f(const uchar * src, size_t src_step, uchar * dst, size_t dst_step, int width, int height, bool inv, bool row) { if(row) return DctIPPLoop(src, src_step, dst, dst_step, width, height, inv); @@ -3978,7 +3978,7 @@ static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_s return false; } - if(ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, pBuffer) < 0) + if(ippDctFun((float*)src, static_cast(src_step), (float*)dst, static_cast(dst_step), pDCTSpec, pBuffer) < 0) { IPP_RELEASE return false; @@ -4010,7 +4010,7 @@ static bool ippi_DCT_32f(const uchar * src, int src_step, uchar * dst, int dst_s buf.allocate( bufSize ); pBuffer = (uchar*)buf; - status = ippDctFun((float*)src, src_step, (float*)dst, dst_step, pDCTSpec, (Ipp8u*)pBuffer); + status = ippDctFun((float*)src, static_cast(src_step), (float*)dst, static_cast(dst_step), pDCTSpec, (Ipp8u*)pBuffer); } if (pDCTSpec)