5947 changed files with 1696775 additions and 1953265 deletions
--- a/.gitattributes
+++ b/.gitattributes
@ -1,84 +0,0 @@
-*           text=auto whitespace=trailing-space,space-before-tab,-indent-with-non-tab,tab-in-indent,tabwidth=4
-
-.git*       text export-ignore
-
-*.aidl         text
-*.appxmanifest text
-*.bib          text
-*.c            text
-*.cl           text
-*.conf         text
-*.cpp          text
-*.css_t        text
-*.cu           text
-*.cxx          text
-*.def          text
-*.filelist     text
-*.h            text
-*.hpp          text
-*.htm          text
-*.html         text
-*.hxx          text
-*.i            text
-*.idl          text
-*.java         text
-*.js           text
-*.m            text
-*.mk           text
-*.mm           text
-*.plist        text
-*.properties   text
-*.py           text
-*.qrc          text
-*.qss          text
-*.S            text
-*.sbt          text
-*.scala        text
-*.sty          text
-*.tex          text
-*.txt          text
-*.xaml         text
-
-# reST underlines/overlines can look like conflict markers
-*.rst          text conflict-marker-size=80
-
-*.cmake         text whitespace=tabwidth=2
-*.cmakein       text whitespace=tabwidth=2
-*.in            text whitespace=tabwidth=2
-CMakeLists.txt  text whitespace=tabwidth=2
-
-*.avi       binary
-*.bmp       binary
-*.exr       binary
-*.ico       binary
-*.jpeg      binary
-*.jpg       binary
-*.png       binary
-
-*.a         binary
-*.so        binary
-*.dll       binary
-*.jar       binary
-
-*.pdf       binary
-*.pbxproj   binary
-*.vec       binary
-*.doc       binary
-*.dia       binary
-
-*.xml                      -text whitespace=cr-at-eol
-*.yml                      -text whitespace=cr-at-eol
-.project                   -text whitespace=cr-at-eol merge=union
-.classpath                 -text whitespace=cr-at-eol merge=union
-.cproject                  -text whitespace=cr-at-eol merge=union
-org.eclipse.jdt.core.prefs -text whitespace=cr-at-eol merge=union
-
-*.bat       text eol=crlf
-*.cmd       text eol=crlf
-*.cmd.tmpl  text eol=crlf
-*.dsp       text eol=crlf -whitespace
-*.sln       text eol=crlf -whitespace
-*.vcproj    text eol=crlf -whitespace merge=union
-*.vcxproj   text eol=crlf -whitespace merge=union
-
-*.sh        text eol=lf
--- a/.github/ISSUE_TEMPLATE.md
+++ b/.github/ISSUE_TEMPLATE.md
@ -1,30 +0,0 @@
-<!--
-If you have a question rather than reporting a bug please go to http://answers.opencv.org where you get much faster responses.
-If you need further assistance please read [How To Contribute](https://github.com/opencv/opencv/wiki/How_to_contribute).
-
-This is a template helping you to create an issue which can be processed as quickly as possible. This is the bug reporting section for the OpenCV library.
-->
-
-##### System information (version)
-<!-- Example
- OpenCV => 3.1
- Operating System / Platform => Windows 64 Bit
- Compiler => Visual Studio 2015
-->
-
- OpenCV => :grey_question:
- Operating System / Platform => :grey_question:
- Compiler => :grey_question:
-
-##### Detailed description
-
-<!-- your description -->
-
-##### Steps to reproduce
-
-<!-- to add code example fence it with triple backticks and optional file extension
-    ```.cpp
-    // C++ code example
-    ```
- or attach as .txt or .zip file
-->
--- a/.github/PULL_REQUEST_TEMPLATE.md
+++ b/.github/PULL_REQUEST_TEMPLATE.md
@ -1,9 +0,0 @@
-<!-- Please use this line to close one or multiple issues when this pullrequest gets merged
-You can add another line right under the first one:
-resolves #1234
-resolves #1235
-->
-
-### This pullrequest changes
-
-<!-- Please describe what your pullrequest is changing -->
--- a/.gitignore
+++ b/.gitignore
@ -1,25 +0,0 @@
-*.autosave
-*.pyc
-*.user
-*~
-.*.swp
-.DS_Store
-.sw[a-z]
-Thumbs.db
-tags
-tegra/
-bin/
-*.sdf
-*.opensdf
-*.obj
-*.stamp
-*.depend
-*.rule
-*.tmp
-*/debug
-*/CMakeFiles
-CMakeCache.txt
-*.suo
-*.log
-*.tlog
-build
--- a/.tgitconfig
+++ b/.tgitconfig
@ -1,2 +0,0 @@
-[tgit]
-    icon = doc/opencv.ico
--- a/3rdparty/.gitattributes
+++ b/3rdparty/.gitattributes
@ -1 +0,0 @@
-* -whitespace
--- a/3rdparty/CMakeLists.txt
+++ b/3rdparty/CMakeLists.txt
@ -0,0 +1,22 @@
+if(ANDROID)
+ configure_file("${CMAKE_SOURCE_DIR}/Android.mk.modules.in" "${CMAKE_CURRENT_BINARY_DIR}/Android.mk")
+endif()
+
+add_subdirectory(lapack)
+add_subdirectory(zlib)
+if(WITH_JASPER AND NOT JASPER_FOUND)
+	add_subdirectory(libjasper)
+endif()
+if(WITH_JPEG AND NOT JPEG_FOUND)
+	add_subdirectory(libjpeg)
+endif()
+if(WITH_PNG AND NOT PNG_FOUND)
+	add_subdirectory(libpng)
+endif()
+if(WITH_TIFF AND NOT TIFF_FOUND)
+	add_subdirectory(libtiff)
+endif()
+
+if(0)
+add_subdirectory(gtest)
+endif()
--- a/3rdparty/carotene/.gitignore
+++ b/3rdparty/carotene/.gitignore
@ -1,8 +0,0 @@
-# Gedit temp files
-*~
-
-# Qt Creator file
-*.user
-
-# MacOS-specific (Desktop Services Store)
-.DS_Store
--- a/3rdparty/carotene/CMakeLists.txt
+++ b/3rdparty/carotene/CMakeLists.txt
@ -1,42 +0,0 @@
-cmake_minimum_required(VERSION 2.8.11 FATAL_ERROR)
-
-project(Carotene)
-
-set(CAROTENE_NS "carotene" CACHE STRING "Namespace for Carotene definitions")
-
-set(CAROTENE_INCLUDE_DIR include)
-set(CAROTENE_SOURCE_DIR src)
-
-file(GLOB_RECURSE carotene_headers RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "${CAROTENE_INCLUDE_DIR}/*.hpp")
-file(GLOB_RECURSE carotene_sources RELATIVE "${CMAKE_CURRENT_LIST_DIR}" "${CAROTENE_SOURCE_DIR}/*.cpp"
-                                                                        "${CAROTENE_SOURCE_DIR}/*.hpp")
-
-include_directories(${CAROTENE_INCLUDE_DIR})
-
-if(CMAKE_COMPILER_IS_GNUCC)
-    set(CMAKE_CXX_FLAGS "-fvisibility=hidden ${CMAKE_CXX_FLAGS}")
-
-    # allow more inlines - these parameters improve performance for:
-    # - matchTemplate about 5-10%
-    # - goodFeaturesToTrack 10-20%
-    # - cornerHarris 30% for some cases
-
-    set_source_files_properties(${carotene_sources} COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000")
-endif()
-
-add_library(carotene_objs OBJECT
-  ${carotene_headers}
-  ${carotene_sources}
-)
-
-if(NOT CAROTENE_NS STREQUAL "carotene")
-    target_compile_definitions(carotene_objs PUBLIC "-DCAROTENE_NS=${CAROTENE_NS}")
-endif()
-
-if(WITH_NEON)
-    target_compile_definitions(carotene_objs PRIVATE "-DWITH_NEON")
-endif()
-
-set_target_properties(carotene_objs PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
-
-add_library(carotene STATIC EXCLUDE_FROM_ALL "$<TARGET_OBJECTS:carotene_objs>")
--- a/3rdparty/carotene/README.md
+++ b/3rdparty/carotene/README.md
@ -1,2 +0,0 @@
-This is Carotene, a low-level library containing optimized CPU routines
-that are useful for computer vision algorithms.
--- a/3rdparty/carotene/hal/CMakeLists.txt
+++ b/3rdparty/carotene/hal/CMakeLists.txt
@ -1,112 +0,0 @@
-cmake_minimum_required(VERSION 2.8.8 FATAL_ERROR)
-
-include(CheckCCompilerFlag)
-include(CheckCXXCompilerFlag)
-
-set(CMAKE_POSITION_INDEPENDENT_CODE ON)
-
-set(TEGRA_HAL_DIR "${CMAKE_CURRENT_SOURCE_DIR}")
-set(CAROTENE_DIR "${TEGRA_HAL_DIR}/../")
-
-if(CMAKE_SYSTEM_PROCESSOR MATCHES "^(arm.*|ARM.*)")
-  set(ARM TRUE)
-elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64.*|AARCH64.*")
-  set(AARCH64 TRUE)
-endif()
-
-set(TEGRA_COMPILER_FLAGS "")
-
-if(CMAKE_COMPILER_IS_GNUCXX)
-  # Generate unwind information even for functions that can't throw/propagate exceptions.
-  # This lets debuggers and such get non-broken backtraces for such functions, even without debugging symbols.
-  list(APPEND TEGRA_COMPILER_FLAGS -funwind-tables)
-endif()
-
-if(CMAKE_COMPILER_IS_GNUCXX)
-  if(X86 OR ARMEABI_V6 OR (MIPS AND ANDROID_COMPILER_VERSION VERSION_LESS "4.6"))
-    list(APPEND TEGRA_COMPILER_FLAGS -fweb -fwrapv -frename-registers -fsched-stalled-insns-dep=100 -fsched-stalled-insns=2)
-  else()
-    list(APPEND TEGRA_COMPILER_FLAGS -fweb -fwrapv -frename-registers -fsched2-use-superblocks -fsched2-use-traces
-                                     -fsched-stalled-insns-dep=100 -fsched-stalled-insns=2)
-  endif()
-  if((ANDROID_COMPILER_IS_CLANG OR NOT ANDROID_COMPILER_VERSION VERSION_LESS "4.7") AND ANDROID_NDK_RELEASE STRGREATER "r8d" )
-    list(APPEND TEGRA_COMPILER_FLAGS -fgraphite -fgraphite-identity -floop-block -floop-flatten -floop-interchange
-                                     -floop-strip-mine -floop-parallelize-all -ftree-loop-linear)
-  endif()
-endif()
-
-string(REPLACE ";" " " TEGRA_COMPILER_FLAGS "${TEGRA_COMPILER_FLAGS}")
-set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${TEGRA_COMPILER_FLAGS}")
-set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${TEGRA_COMPILER_FLAGS}")
-
-if(ARMEABI_V7A)
-  if (CMAKE_COMPILER_IS_GNUCXX)
-    set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-tree-vectorize" )
-    set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-tree-vectorize" )
-  endif()
-endif()
-
-if(WITH_LOGS)
-  add_definitions(-DHAVE_LOGS)
-endif()
-
-set(CAROTENE_NS "carotene_o4t" CACHE STRING "" FORCE)
-
-function(compile_carotene)
-  if(ENABLE_NEON)
-    set(WITH_NEON ON)
-  endif()
-
-  add_subdirectory("${CAROTENE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}/carotene")
-
-  if(ARM OR AARCH64)
-    if(CMAKE_BUILD_TYPE)
-      set(CMAKE_TRY_COMPILE_CONFIGURATION ${CMAKE_BUILD_TYPE})
-    endif()
-    check_cxx_compiler_flag("-mfpu=neon" CXX_HAS_MFPU_NEON)
-    check_c_compiler_flag("-mfpu=neon" C_HAS_MFPU_NEON)
-    if(${CXX_HAS_MFPU_NEON} AND ${C_HAS_MFPU_NEON})
-      get_target_property(old_flags "carotene_objs" COMPILE_FLAGS)
-      if(old_flags)
-        set_target_properties("carotene_objs" PROPERTIES COMPILE_FLAGS "${old_flags} -mfpu=neon")
-      else()
-        set_target_properties("carotene_objs" PROPERTIES COMPILE_FLAGS "-mfpu=neon")
-      endif()
-    endif()
-  endif()
-endfunction()
-
-compile_carotene()
-
-include_directories("${CAROTENE_DIR}/include")
-
-get_target_property(carotene_defs carotene_objs INTERFACE_COMPILE_DEFINITIONS)
-set_property(DIRECTORY APPEND PROPERTY COMPILE_DEFINITIONS ${carotene_defs})
-
-  if (CMAKE_COMPILER_IS_GNUCXX)
-    # allow more inlines - these parameters improve performance for:
-    #   matchTemplate about 5-10%
-    #   goodFeaturesToTrack 10-20%
-    #   cornerHarris 30% for some cases
-    set_source_files_properties(impl.cpp $<TARGET_OBJECTS:carotene_objs> COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000")
-#    set_source_files_properties(impl.cpp $<TARGET_OBJECTS:carotene_objs> COMPILE_FLAGS "--param ipcp-unit-growth=100000 --param inline-unit-growth=100000 --param large-stack-frame-growth=5000")
-  endif()
-
-add_library(tegra_hal STATIC $<TARGET_OBJECTS:carotene_objs>)
-set_target_properties(tegra_hal PROPERTIES POSITION_INDEPENDENT_CODE TRUE)
-set_target_properties(tegra_hal PROPERTIES ARCHIVE_OUTPUT_DIRECTORY ${3P_LIBRARY_OUTPUT_PATH})
-set(OPENCV_SRC_DIR "${CMAKE_SOURCE_DIR}")
-if(NOT BUILD_SHARED_LIBS)
-  ocv_install_target(tegra_hal EXPORT OpenCVModules ARCHIVE DESTINATION ${OPENCV_3P_LIB_INSTALL_PATH} COMPONENT dev)
-endif()
-target_include_directories(tegra_hal PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${OPENCV_SRC_DIR}/modules/core/include)
-
-set(CAROTENE_HAL_VERSION "0.0.1" PARENT_SCOPE)
-set(CAROTENE_HAL_LIBRARIES "tegra_hal" PARENT_SCOPE)
-set(CAROTENE_HAL_HEADERS "carotene/tegra_hal.hpp" PARENT_SCOPE)
-set(CAROTENE_HAL_INCLUDE_DIRS "${CMAKE_BINARY_DIR}" PARENT_SCOPE)
-
-configure_file("tegra_hal.hpp" "${CMAKE_BINARY_DIR}/carotene/tegra_hal.hpp" COPYONLY)
-configure_file("${CAROTENE_DIR}/include/carotene/definitions.hpp" "${CMAKE_BINARY_DIR}/carotene/definitions.hpp" COPYONLY)
-configure_file("${CAROTENE_DIR}/include/carotene/functions.hpp" "${CMAKE_BINARY_DIR}/carotene/functions.hpp" COPYONLY)
-configure_file("${CAROTENE_DIR}/include/carotene/types.hpp" "${CMAKE_BINARY_DIR}/carotene/types.hpp" COPYONLY)
--- a/3rdparty/carotene/hal/tegra_hal.hpp
+++ b/3rdparty/carotene/hal/tegra_hal.hpp
--- a/3rdparty/carotene/include/carotene/definitions.hpp
+++ b/3rdparty/carotene/include/carotene/definitions.hpp
@ -1,47 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#ifndef CAROTENE_DEFINITIONS_HPP
-#define CAROTENE_DEFINITIONS_HPP
-
-#ifndef CAROTENE_NS
-#define CAROTENE_NS carotene
-#endif
-
-#endif
--- a/3rdparty/carotene/include/carotene/functions.hpp
+++ b/3rdparty/carotene/include/carotene/functions.hpp
--- a/3rdparty/carotene/include/carotene/types.hpp
+++ b/3rdparty/carotene/include/carotene/types.hpp
@ -1,125 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#ifndef CAROTENE_TYPES_HPP
-#define CAROTENE_TYPES_HPP
-
-#include <carotene/definitions.hpp>
-#include <stdint.h>
-#include <cstddef>
-
-#ifndef UINT32_MAX
-    #define UINT32_MAX (4294967295U)
-#endif
-
-namespace CAROTENE_NS {
-    using std::size_t;
-    using std::ptrdiff_t;
-
-    typedef int8_t   s8;
-    typedef uint8_t  u8;
-    typedef int16_t  s16;
-    typedef uint16_t u16;
-    typedef int32_t  s32;
-    typedef uint32_t u32;
-    typedef float    f32;
-    typedef int64_t  s64;
-    typedef uint64_t u64;
-    typedef double   f64;
-
-    typedef ptrdiff_t  stride_t;
-
-    enum CONVERT_POLICY
-    {
-        CONVERT_POLICY_WRAP,
-        CONVERT_POLICY_SATURATE
-    };
-
-    enum BORDER_MODE
-    {
-        BORDER_MODE_UNDEFINED,
-        BORDER_MODE_CONSTANT,
-        BORDER_MODE_REPLICATE,
-        BORDER_MODE_REFLECT,
-        BORDER_MODE_REFLECT101,
-        BORDER_MODE_WRAP
-    };
-
-    enum FLIP_MODE
-    {
-        FLIP_HORIZONTAL_MODE = 1,
-        FLIP_VERTICAL_MODE = 2,
-        FLIP_BOTH_MODE = FLIP_HORIZONTAL_MODE | FLIP_VERTICAL_MODE
-    };
-
-    enum COLOR_SPACE
-    {
-        COLOR_SPACE_BT601,
-        COLOR_SPACE_BT709
-    };
-
-    struct Size2D {
-        Size2D() : width(0), height(0) {}
-        Size2D(size_t width_, size_t height_) : width(width_), height(height_) {}
-
-        size_t width;
-        size_t height;
-
-        inline size_t total() const
-        {
-            return width * height;
-        }
-    };
-
-    struct Margin {
-        Margin() : left(0), right(0), top(0), bottom(0) {}
-        Margin(size_t left_, size_t right_, size_t top_, size_t bottom_)
-            : left(left_), right(right_), top(top_), bottom(bottom_) {}
-
-        // these are measured in elements
-        size_t left, right, top, bottom;
-    };
-
-    struct KeypointStore {
-        virtual void push(f32 kpX, f32 kpY, f32 kpSize, f32 kpAngle=-1, f32 kpResponse=0, s32 kpOctave=0, s32 kpClass_id=-1) = 0;
-        virtual ~KeypointStore() {};
-    };
-}
-
-#endif
--- a/3rdparty/carotene/src/absdiff.cpp
+++ b/3rdparty/carotene/src/absdiff.cpp
@ -1,241 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include <algorithm>
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <typename T>
-struct AbsDiff
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        v_dst = internal::vabdq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        v_dst = internal::vabd(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = src0[0] >= src1[0] ? src0[0] - src1[0] : src1[0] - src0[0];
-    }
-};
-
-template <typename T>
-struct AbsDiffSigned
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        typename internal::VecTraits<T>::vec128 v_min = internal::vminq(v_src0, v_src1);
-        typename internal::VecTraits<T>::vec128 v_max = internal::vmaxq(v_src0, v_src1);
-        v_dst = internal::vqsubq(v_max, v_min);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        typename internal::VecTraits<T>::vec64 v_min = internal::vmin(v_src0, v_src1);
-        typename internal::VecTraits<T>::vec64 v_max = internal::vmax(v_src0, v_src1);
-        v_dst = internal::vqsub(v_max, v_min);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = internal::saturate_cast<T>(src0[0] >= src1[0] ? (s64)src0[0] - src1[0] : (s64)src1[0] - src0[0]);
-    }
-};
-
-} // namespace
-
-#endif
-
-void absDiff(const Size2D &size,
-             const u8 *src0Base, ptrdiff_t src0Stride,
-             const u8 *src1Base, ptrdiff_t src1Stride,
-             u8 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, AbsDiff<u8>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void absDiff(const Size2D &size,
-             const u16 *src0Base, ptrdiff_t src0Stride,
-             const u16 *src1Base, ptrdiff_t src1Stride,
-             u16 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, AbsDiff<u16>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void absDiff(const Size2D &size,
-             const s8 *src0Base, ptrdiff_t src0Stride,
-             const s8 *src1Base, ptrdiff_t src1Stride,
-             s8 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, AbsDiffSigned<s8>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void absDiff(const Size2D &size,
-             const s16 *src0Base, ptrdiff_t src0Stride,
-             const s16 *src1Base, ptrdiff_t src1Stride,
-             s16 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, AbsDiffSigned<s16>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void absDiff(const Size2D &size,
-             const s32 *src0Base, ptrdiff_t src0Stride,
-             const s32 *src1Base, ptrdiff_t src1Stride,
-             s32 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, AbsDiffSigned<s32>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void absDiff(const Size2D &size,
-             const f32 * src0Base, ptrdiff_t src0Stride,
-             const f32 * src1Base, ptrdiff_t src1Stride,
-             f32 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, AbsDiff<f32>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/accumulate.cpp
+++ b/3rdparty/carotene/src/accumulate.cpp
@ -1,408 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-void accumulate(const Size2D &size,
-                const u8 *srcBase, ptrdiff_t srcStride,
-                s16 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8* src = internal::getRowPtr(srcBase, srcStride, i);
-        s16* dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            internal::prefetch(dst + j);
-            uint8x16_t v_src = vld1q_u8(src + j);
-            int16x8_t v_dst0 = vld1q_s16(dst + j);
-            int16x8_t v_dst1 = vld1q_s16(dst + j + 8);
-            int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));
-            int16x8_t v_src1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));
-            v_dst0 = vqaddq_s16(v_dst0, v_src0);
-            v_dst1 = vqaddq_s16(v_dst1, v_src1);
-            vst1q_s16(dst + j, v_dst0);
-            vst1q_s16(dst + j + 8, v_dst1);
-        }
-        for (; j < roiw8; j += 8)
-        {
-            uint8x8_t v_src = vld1_u8(src + j);
-            int16x8_t v_src16 = vreinterpretq_s16_u16(vmovl_u8(v_src));
-            int16x8_t v_dst = vld1q_s16(dst + j);
-            v_dst = vqaddq_s16(v_dst, v_src16);
-            vst1q_s16(dst + j, v_dst);
-        }
-
-        for (; j < size.width; j++)
-            dst[j] = internal::saturate_cast<s16>(src[j] + dst[j]);
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <int shift>
-void accumulateSquareConst(const Size2D &size,
-                           const u8 *srcBase, ptrdiff_t srcStride,
-                           s16 *dstBase, ptrdiff_t dstStride)
-{
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8* src = internal::getRowPtr(srcBase, srcStride, i);
-        s16* dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            internal::prefetch(dst + j);
-            uint8x16_t v_src = vld1q_u8(src + j);
-            int16x8_t v_dst0 = vld1q_s16(dst + j), v_dst1 = vld1q_s16(dst + j + 8);
-            int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));
-            int16x8_t v_src1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));
-
-            int16x4_t v_srclo = vget_low_s16(v_src0), v_srchi = vget_high_s16(v_src0);
-            v_dst0 = vcombine_s16(vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srclo, v_srclo), shift), vget_low_s16(v_dst0))),
-                                  vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srchi, v_srchi), shift), vget_high_s16(v_dst0))));
-
-            v_srclo = vget_low_s16(v_src1);
-            v_srchi = vget_high_s16(v_src1);
-            v_dst1 = vcombine_s16(vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srclo, v_srclo), shift), vget_low_s16(v_dst1))),
-                                  vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srchi, v_srchi), shift), vget_high_s16(v_dst1))));
-
-            vst1q_s16(dst + j, v_dst0);
-            vst1q_s16(dst + j + 8, v_dst1);
-        }
-        for (; j < roiw8; j += 8)
-        {
-            int16x8_t v_src = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));
-            int16x8_t v_dst = vld1q_s16(dst + j);
-            int16x4_t v_srclo = vget_low_s16(v_src), v_srchi = vget_high_s16(v_src);
-            v_dst = vcombine_s16(vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srclo, v_srclo), shift), vget_low_s16(v_dst))),
-                                 vqmovn_s32(vaddw_s16(vshrq_n_s32(vmull_s16(v_srchi, v_srchi), shift), vget_high_s16(v_dst))));
-            vst1q_s16(dst + j, v_dst);
-        }
-
-        for (; j < size.width; j++)
-        {
-            s32 srcVal = src[j];
-            dst[j] = internal::saturate_cast<s16>(dst[j] + ((srcVal * srcVal) >> shift));
-        }
-    }
-}
-
-template <>
-void accumulateSquareConst<0>(const Size2D &size,
-                              const u8 *srcBase, ptrdiff_t srcStride,
-                              s16 *dstBase, ptrdiff_t dstStride)
-{
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8* src = internal::getRowPtr(srcBase, srcStride, i);
-        s16* dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            internal::prefetch(dst + j);
-            uint8x16_t v_src = vld1q_u8(src + j);
-            int16x8_t v_dst0 = vld1q_s16(dst + j), v_dst1 = vld1q_s16(dst + j + 8);
-            int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));
-            int16x8_t v_src1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));
-
-            int16x4_t v_srclo = vget_low_s16(v_src0), v_srchi = vget_high_s16(v_src0);
-            v_dst0 = vcombine_s16(vqmovn_s32(vaddw_s16(vmull_s16(v_srclo, v_srclo), vget_low_s16(v_dst0))),
-                                  vqmovn_s32(vaddw_s16(vmull_s16(v_srchi, v_srchi), vget_high_s16(v_dst0))));
-
-            v_srclo = vget_low_s16(v_src1);
-            v_srchi = vget_high_s16(v_src1);
-            v_dst1 = vcombine_s16(vqmovn_s32(vaddw_s16(vmull_s16(v_srclo, v_srclo), vget_low_s16(v_dst1))),
-                                  vqmovn_s32(vaddw_s16(vmull_s16(v_srchi, v_srchi), vget_high_s16(v_dst1))));
-
-            vst1q_s16(dst + j, v_dst0);
-            vst1q_s16(dst + j + 8, v_dst1);
-        }
-        for (; j < roiw8; j += 8)
-        {
-            int16x8_t v_src = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));
-            int16x8_t v_dst = vld1q_s16(dst + j);
-            int16x4_t v_srclo = vget_low_s16(v_src), v_srchi = vget_high_s16(v_src);
-            v_dst = vcombine_s16(vqmovn_s32(vaddw_s16(vmull_s16(v_srclo, v_srclo), vget_low_s16(v_dst))),
-                                 vqmovn_s32(vaddw_s16(vmull_s16(v_srchi, v_srchi), vget_high_s16(v_dst))));
-            vst1q_s16(dst + j, v_dst);
-        }
-
-        for (; j < size.width; j++)
-        {
-            s32 srcVal = src[j];
-            dst[j] = internal::saturate_cast<s16>(dst[j] + srcVal * srcVal);
-        }
-    }
-}
-
-typedef void (* accumulateSquareConstFunc)(const Size2D &size,
-                                           const u8 *srcBase, ptrdiff_t srcStride,
-                                           s16 *dstBase, ptrdiff_t dstStride);
-
-} // namespace
-
-#endif
-
-void accumulateSquare(const Size2D &size,
-                      const u8 *srcBase, ptrdiff_t srcStride,
-                      s16 *dstBase, ptrdiff_t dstStride,
-                      u32 shift)
-{
-    if (shift >= 16)
-    {
-        for (size_t i = 0; i < size.height; ++i)
-        {
-            s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-            std::memset(dst, 0, sizeof(s16) * size.width);
-        }
-        return;
-    }
-
-    internal::assertSupportedConfiguration();
-
-#ifdef CAROTENE_NEON
-    // this ugly contruction is needed to avoid:
-    // /usr/lib/gcc/arm-linux-gnueabihf/4.8/include/arm_neon.h:3581:59: error: argument must be a constant
-    // return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1);
-
-    accumulateSquareConstFunc funcs[16] =
-    {
-        accumulateSquareConst<0>,
-        accumulateSquareConst<1>,
-        accumulateSquareConst<2>,
-        accumulateSquareConst<3>,
-        accumulateSquareConst<4>,
-        accumulateSquareConst<5>,
-        accumulateSquareConst<6>,
-        accumulateSquareConst<7>,
-        accumulateSquareConst<8>,
-        accumulateSquareConst<9>,
-        accumulateSquareConst<10>,
-        accumulateSquareConst<11>,
-        accumulateSquareConst<12>,
-        accumulateSquareConst<13>,
-        accumulateSquareConst<14>,
-        accumulateSquareConst<15>
-    }, func = funcs[shift];
-
-    func(size, srcBase, srcStride, dstBase, dstStride);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)shift;
-#endif
-}
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-struct AccumulateWeightedHalf
-{
-    typedef u8 type;
-
-    void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
-                     uint8x16_t & v_dst) const
-    {
-        v_dst = vhaddq_u8(v_src0, v_src1);
-    }
-
-    void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
-                     uint8x8_t & v_dst) const
-    {
-        v_dst = vhadd_u8(v_src0, v_src1);
-    }
-
-    void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
-    {
-        dst[0] = ((u16)(src0[0]) + src1[0]) >> 1;
-    }
-};
-
-struct AccumulateWeighted
-{
-    typedef u8 type;
-
-    float alpha, beta;
-    float32x4_t v_alpha, v_beta;
-
-    explicit AccumulateWeighted(float _alpha) :
-        alpha(_alpha), beta(1 - _alpha)
-    {
-        v_alpha = vdupq_n_f32(alpha);
-        v_beta = vdupq_n_f32(beta);
-    }
-
-    void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
-                     uint8x16_t & v_dst) const
-    {
-        uint16x8_t v_src0_p = vmovl_u8(vget_low_u8(v_src0));
-        uint16x8_t v_src1_p = vmovl_u8(vget_low_u8(v_src1));
-        float32x4_t v_dst0f = vmlaq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1_p))), v_beta),
-                                        v_alpha, vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0_p))));
-        float32x4_t v_dst1f = vmlaq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1_p))), v_beta),
-                                        v_alpha, vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0_p))));
-        uint16x8_t v_dst0 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(v_dst0f)),
-                                         vmovn_u32(vcvtq_u32_f32(v_dst1f)));
-
-        v_src0_p = vmovl_u8(vget_high_u8(v_src0));
-        v_src1_p = vmovl_u8(vget_high_u8(v_src1));
-        v_dst0f = vmlaq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1_p))), v_beta),
-                            v_alpha, vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0_p))));
-        v_dst1f = vmlaq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1_p))), v_beta),
-                            v_alpha, vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0_p))));
-        uint16x8_t v_dst1 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(v_dst0f)),
-                                         vmovn_u32(vcvtq_u32_f32(v_dst1f)));
-
-        v_dst = vcombine_u8(vmovn_u16(v_dst0), vmovn_u16(v_dst1));
-    }
-
-    void operator() (const uint8x8_t & _v_src0, const uint8x8_t & _v_src1,
-                     uint8x8_t & v_dst) const
-    {
-        uint16x8_t v_src0 = vmovl_u8(_v_src0), v_src1 = vmovl_u8(_v_src1);
-
-        float32x4_t v_dst0f = vmlaq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src1))), v_beta),
-                                        v_alpha, vcvtq_f32_u32(vmovl_u16(vget_low_u16(v_src0))));
-        float32x4_t v_dst1f = vmlaq_f32(vmulq_f32(vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src1))), v_beta),
-                                        v_alpha, vcvtq_f32_u32(vmovl_u16(vget_high_u16(v_src0))));
-        uint16x8_t _v_dst = vcombine_u16(vmovn_u32(vcvtq_u32_f32(v_dst0f)),
-                                        vmovn_u32(vcvtq_u32_f32(v_dst1f)));
-
-        v_dst = vmovn_u16(_v_dst);
-    }
-
-    void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
-    {
-        dst[0] = beta * src1[0] + alpha * src0[0];
-    }
-};
-
-} // namespace
-
-#endif
-
-void accumulateWeighted(const Size2D &size,
-                        const u8 *srcBase, ptrdiff_t srcStride,
-                        u8 *dstBase, ptrdiff_t dstStride,
-                        f32 alpha)
-{
-    if (alpha == 0.0f)
-        return;
-    if (alpha == 1.0f)
-    {
-        for (size_t i = 0; i < size.height; ++i)
-        {
-            const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-            u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-            std::memcpy(dst, src, sizeof(u8) * size.width);
-        }
-        return;
-    }
-
-    internal::assertSupportedConfiguration();
-
-#ifdef CAROTENE_NEON
-    // in this case we can use the following scheme:
-    // dst[p] = (src[p] + dst[p]) >> 1
-    // which is faster
-    if (alpha == 0.5f)
-    {
-        internal::vtransform(size,
-                             srcBase, srcStride,
-                             dstBase, dstStride,
-                             dstBase, dstStride,
-                             AccumulateWeightedHalf());
-
-        return;
-    }
-
-    internal::vtransform(size,
-                     srcBase, srcStride,
-                     dstBase, dstStride,
-                     dstBase, dstStride,
-                     AccumulateWeighted(alpha));
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)alpha;
-#endif
-}
-
-} //namespace CAROTENE_NS
--- a/3rdparty/carotene/src/add.cpp
+++ b/3rdparty/carotene/src/add.cpp
@ -1,475 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <typename T, typename WT>
-struct AddWrap
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        v_dst = internal::vaddq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        v_dst = internal::vadd(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = (T)((WT)src0[0] + (WT)src1[0]);
-    }
-};
-
-template <typename T, typename WT>
-struct AddSaturate
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        v_dst = internal::vqaddq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        v_dst = internal::vqadd(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = internal::saturate_cast<T>((WT)src0[0] + (WT)src1[0]);
-    }
-};
-
-} // namespace
-
-#endif
-
-void add(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const u8 * src1Base, ptrdiff_t src1Stride,
-         u8 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddSaturate<u8, u16>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddWrap<u8, u16>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void add(const Size2D &size,
-         const s8 * src0Base, ptrdiff_t src0Stride,
-         const s8 * src1Base, ptrdiff_t src1Stride,
-         s8 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddSaturate<s8, s16>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddWrap<s8, s16>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void add(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const u8 * src1Base, ptrdiff_t src1Stride,
-         s16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const u8 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        u16 * dst = internal::getRowPtr((u16 *)dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw32; j += 32)
-        {
-            internal::prefetch(src0 + j);
-            internal::prefetch(src1 + j);
-            uint8x16_t v_src00 = vld1q_u8(src0 + j), v_src01 = vld1q_u8(src0 + j + 16);
-            uint8x16_t v_src10 = vld1q_u8(src1 + j), v_src11 = vld1q_u8(src1 + j + 16);
-            vst1q_u16(dst + j, vaddl_u8(vget_low_u8(v_src00), vget_low_u8(v_src10)));
-            vst1q_u16(dst + j + 8, vaddl_u8(vget_high_u8(v_src00), vget_high_u8(v_src10)));
-            vst1q_u16(dst + j + 16, vaddl_u8(vget_low_u8(v_src01), vget_low_u8(v_src11)));
-            vst1q_u16(dst + j + 24, vaddl_u8(vget_high_u8(v_src01), vget_high_u8(v_src11)));
-        }
-        for (; j < roiw8; j += 8)
-        {
-            uint8x8_t v_src0 = vld1_u8(src0 + j);
-            uint8x8_t v_src1 = vld1_u8(src1 + j);
-            vst1q_u16(dst + j, vaddl_u8(v_src0, v_src1));
-        }
-
-        for (; j < size.width; j++)
-            dst[j] = (u16)src0[j] + (u16)src1[j];
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void add(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const s16 * src1Base, ptrdiff_t src1Stride,
-         s16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const s16 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        if (policy == CONVERT_POLICY_SATURATE)
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-                uint8x16_t v_src0 = vld1q_u8(src0 + j);
-                int16x8_t v_src00 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src0)));
-                int16x8_t v_src01 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src0)));
-                int16x8_t v_src10 = vld1q_s16(src1 + j), v_src11 = vld1q_s16(src1 + j + 8);
-                int16x8_t v_dst0 = vqaddq_s16(v_src00, v_src10);
-                int16x8_t v_dst1 = vqaddq_s16(v_src01, v_src11);
-                vst1q_s16(dst + j, v_dst0);
-                vst1q_s16(dst + j + 8, v_dst1);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src0 + j)));
-                int16x8_t v_src1 = vld1q_s16(src1 + j);
-                int16x8_t v_dst = vqaddq_s16(v_src0, v_src1);
-                vst1q_s16(dst + j, v_dst);
-            }
-
-            for (; j < size.width; j++)
-                dst[j] = internal::saturate_cast<s16>((s32)src0[j] + (s32)src1[j]);
-        }
-        else
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-                uint8x16_t v_src0 = vld1q_u8(src0 + j);
-                int16x8_t v_src00 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src0)));
-                int16x8_t v_src01 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src0)));
-                int16x8_t v_src10 = vld1q_s16(src1 + j), v_src11 = vld1q_s16(src1 + j + 8);
-                int16x8_t v_dst0 = vaddq_s16(v_src00, v_src10);
-                int16x8_t v_dst1 = vaddq_s16(v_src01, v_src11);
-                vst1q_s16(dst + j, v_dst0);
-                vst1q_s16(dst + j + 8, v_dst1);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src0 + j)));
-                int16x8_t v_src1 = vld1q_s16(src1 + j);
-                int16x8_t v_dst = vaddq_s16(v_src0, v_src1);
-                vst1q_s16(dst + j, v_dst);
-            }
-
-            for (; j < size.width; j++)
-                dst[j] = (s16)((s32)src0[j] + (s32)src1[j]);
-        }
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void add(const Size2D &size,
-         const s16 * src0Base, ptrdiff_t src0Stride,
-         const s16 * src1Base, ptrdiff_t src1Stride,
-         s16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-        if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddSaturate<s16, s32>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddWrap<s16, s32>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void add(const Size2D &size,
-         const u16 * src0Base, ptrdiff_t src0Stride,
-         const u16 * src1Base, ptrdiff_t src1Stride,
-         u16 * dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-        if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddSaturate<u16, u32>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddWrap<u16, u32>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void add(const Size2D &size,
-         const s32 * src0Base, ptrdiff_t src0Stride,
-         const s32 * src1Base, ptrdiff_t src1Stride,
-         s32 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-        if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddSaturate<s32, s64>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddWrap<s32, s64>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void add(const Size2D &size,
-         const u32 * src0Base, ptrdiff_t src0Stride,
-         const u32 * src1Base, ptrdiff_t src1Stride,
-         u32 * dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-        if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddSaturate<u32, u64>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             AddWrap<u32, u64>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void add(const Size2D &size,
-         const f32 * src0Base, ptrdiff_t src0Stride,
-         const f32 * src1Base, ptrdiff_t src1Stride,
-         f32 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride,
-                         AddWrap<f32, f32>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/add_weighted.cpp
+++ b/3rdparty/carotene/src/add_weighted.cpp
@ -1,265 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-using namespace internal;
-
-template <typename T> struct TypeTraits;
-template <> struct TypeTraits< u8> { typedef u16 wide;                     typedef  u8 unsign; typedef  uint8x16_t vec128; };
-template <> struct TypeTraits< s8> { typedef s16 wide;                     typedef  u8 unsign; typedef   int8x16_t vec128; };
-template <> struct TypeTraits<u16> { typedef u32 wide; typedef  u8 narrow; typedef u16 unsign; typedef  uint16x8_t vec128; };
-template <> struct TypeTraits<s16> { typedef s32 wide; typedef  s8 narrow; typedef u16 unsign; typedef   int16x8_t vec128; };
-template <> struct TypeTraits<u32> { typedef u64 wide; typedef u16 narrow; typedef u32 unsign; typedef  uint32x4_t vec128; };
-template <> struct TypeTraits<s32> { typedef s64 wide; typedef s16 narrow; typedef u32 unsign; typedef   int32x4_t vec128; };
-template <> struct TypeTraits<f32> { typedef f64 wide;                                         typedef float32x4_t vec128; };
-
-template <typename T> struct wAdd
-{
-    typedef T type;
-
-    f32 alpha, beta, gamma;
-    typedef typename TypeTraits<T>::wide wtype;
-    wAdd<wtype> wideAdd;
-    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
-        alpha(_alpha), beta(_beta), gamma(_gamma),
-        wideAdd(_alpha, _beta, _gamma) {}
-
-    void operator() (const typename VecTraits<T>::vec128 & v_src0,
-                     const typename VecTraits<T>::vec128 & v_src1,
-                     typename VecTraits<T>::vec128 & v_dst) const
-    {
-        typename VecTraits<wtype>::vec128 vrl, vrh;
-        wideAdd(vmovl( vget_low(v_src0)), vmovl( vget_low(v_src1)), vrl);
-        wideAdd(vmovl(vget_high(v_src0)), vmovl(vget_high(v_src1)), vrh);
-
-        v_dst = vcombine(vqmovn(vrl), vqmovn(vrh));
-    }
-
-    void operator() (const typename VecTraits<T>::vec64 & v_src0,
-                     const typename VecTraits<T>::vec64 & v_src1,
-                     typename VecTraits<T>::vec64 & v_dst) const
-    {
-        typename VecTraits<wtype>::vec128 vr;
-        wideAdd(vmovl(v_src0), vmovl(v_src1), vr);
-
-        v_dst = vqmovn(vr);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = saturate_cast<T>(alpha*src0[0] + beta*src1[0] + gamma);
-    }
-};
-
-template <> struct wAdd<s32>
-{
-    typedef s32 type;
-
-    f32 alpha, beta, gamma;
-    float32x4_t valpha, vbeta, vgamma;
-    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
-        alpha(_alpha), beta(_beta), gamma(_gamma)
-    {
-        valpha = vdupq_n_f32(_alpha);
-        vbeta = vdupq_n_f32(_beta);
-        vgamma = vdupq_n_f32(_gamma + 0.5);
-    }
-
-    void operator() (const typename VecTraits<s32>::vec128 & v_src0,
-                     const typename VecTraits<s32>::vec128 & v_src1,
-                     typename VecTraits<s32>::vec128 & v_dst) const
-    {
-        float32x4_t vs1 = vcvtq_f32_s32(v_src0);
-        float32x4_t vs2 = vcvtq_f32_s32(v_src1);
-
-        vs1 = vmlaq_f32(vgamma, vs1, valpha);
-        vs1 = vmlaq_f32(vs1, vs2, vbeta);
-        v_dst = vcvtq_s32_f32(vs1);
-    }
-
-    void operator() (const typename VecTraits<s32>::vec64 & v_src0,
-                     const typename VecTraits<s32>::vec64 & v_src1,
-                     typename VecTraits<s32>::vec64 & v_dst) const
-    {
-        float32x2_t vs1 = vcvt_f32_s32(v_src0);
-        float32x2_t vs2 = vcvt_f32_s32(v_src1);
-
-        vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
-        vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
-        v_dst = vcvt_s32_f32(vs1);
-    }
-
-    void operator() (const s32 * src0, const s32 * src1, s32 * dst) const
-    {
-        dst[0] = saturate_cast<s32>(alpha*src0[0] + beta*src1[0] + gamma);
-    }
-};
-
-template <> struct wAdd<u32>
-{
-    typedef u32 type;
-
-    f32 alpha, beta, gamma;
-    float32x4_t valpha, vbeta, vgamma;
-    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
-        alpha(_alpha), beta(_beta), gamma(_gamma)
-    {
-        valpha = vdupq_n_f32(_alpha);
-        vbeta = vdupq_n_f32(_beta);
-        vgamma = vdupq_n_f32(_gamma + 0.5);
-    }
-
-    void operator() (const typename VecTraits<u32>::vec128 & v_src0,
-                     const typename VecTraits<u32>::vec128 & v_src1,
-                     typename VecTraits<u32>::vec128 & v_dst) const
-    {
-        float32x4_t vs1 = vcvtq_f32_u32(v_src0);
-        float32x4_t vs2 = vcvtq_f32_u32(v_src1);
-
-        vs1 = vmlaq_f32(vgamma, vs1, valpha);
-        vs1 = vmlaq_f32(vs1, vs2, vbeta);
-        v_dst = vcvtq_u32_f32(vs1);
-    }
-
-    void operator() (const typename VecTraits<u32>::vec64 & v_src0,
-                     const typename VecTraits<u32>::vec64 & v_src1,
-                     typename VecTraits<u32>::vec64 & v_dst) const
-    {
-        float32x2_t vs1 = vcvt_f32_u32(v_src0);
-        float32x2_t vs2 = vcvt_f32_u32(v_src1);
-
-        vs1 = vmla_f32(vget_low(vgamma), vs1, vget_low(valpha));
-        vs1 = vmla_f32(vs1, vs2, vget_low(vbeta));
-        v_dst = vcvt_u32_f32(vs1);
-    }
-
-    void operator() (const u32 * src0, const u32 * src1, u32 * dst) const
-    {
-        dst[0] = saturate_cast<u32>(alpha*src0[0] + beta*src1[0] + gamma);
-    }
-};
-
-template <> struct wAdd<f32>
-{
-    typedef f32 type;
-
-    f32 alpha, beta, gamma;
-    float32x4_t valpha, vbeta, vgamma;
-    wAdd(f32 _alpha, f32 _beta, f32 _gamma):
-        alpha(_alpha), beta(_beta), gamma(_gamma)
-    {
-        valpha = vdupq_n_f32(_alpha);
-        vbeta = vdupq_n_f32(_beta);
-        vgamma = vdupq_n_f32(_gamma + 0.5);
-    }
-
-    void operator() (const typename VecTraits<f32>::vec128 & v_src0,
-                     const typename VecTraits<f32>::vec128 & v_src1,
-                     typename VecTraits<f32>::vec128 & v_dst) const
-    {
-        float32x4_t vs1 = vmlaq_f32(vgamma, v_src0, valpha);
-        v_dst = vmlaq_f32(vs1, v_src1, vbeta);
-    }
-
-    void operator() (const typename VecTraits<f32>::vec64 & v_src0,
-                     const typename VecTraits<f32>::vec64 & v_src1,
-                     typename VecTraits<f32>::vec64 & v_dst) const
-    {
-        float32x2_t vs1 = vmla_f32(vget_low(vgamma), v_src0, vget_low(valpha));
-        v_dst = vmla_f32(vs1, v_src1, vget_low(vbeta));
-
-    }
-
-    void operator() (const f32 * src0, const f32 * src1, f32 * dst) const
-    {
-        dst[0] = alpha*src0[0] + beta*src1[0] + gamma;
-    }
-};
-
-} // namespace
-
-#define IMPL_ADDWEIGHTED(type)                                \
-void addWeighted(const Size2D &size,                          \
-                 const type * src0Base, ptrdiff_t src0Stride, \
-                 const type * src1Base, ptrdiff_t src1Stride, \
-                 type * dstBase, ptrdiff_t dstStride,         \
-                 f32 alpha, f32 beta, f32 gamma)              \
-{                                                             \
-    internal::assertSupportedConfiguration();                 \
-    wAdd<type> wgtAdd(alpha,                                  \
-                      beta,                                   \
-                      gamma);                                 \
-    internal::vtransform(size,                                \
-                         src0Base, src0Stride,                \
-                         src1Base, src1Stride,                \
-                         dstBase, dstStride,                  \
-                         wgtAdd);                             \
-}
-
-#else
-
-#define IMPL_ADDWEIGHTED(type)                                \
-void addWeighted(const Size2D &,                              \
-                 const type *, ptrdiff_t,                     \
-                 const type *, ptrdiff_t,                     \
-                 type *, ptrdiff_t,                           \
-                 f32, f32, f32)                               \
-{                                                             \
-    internal::assertSupportedConfiguration();                 \
-}
-
-#endif
-
-IMPL_ADDWEIGHTED(u8)
-IMPL_ADDWEIGHTED(s8)
-IMPL_ADDWEIGHTED(u16)
-IMPL_ADDWEIGHTED(s16)
-IMPL_ADDWEIGHTED(u32)
-IMPL_ADDWEIGHTED(s32)
-IMPL_ADDWEIGHTED(f32)
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/bitwise.cpp
+++ b/3rdparty/carotene/src/bitwise.cpp
@ -1,225 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-struct BitwiseAnd
-{
-    typedef u8 type;
-
-    void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
-                     uint8x16_t & v_dst) const
-    {
-        v_dst = vandq_u8(v_src0, v_src1);
-    }
-
-    void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
-                     uint8x8_t & v_dst) const
-    {
-        v_dst = vand_u8(v_src0, v_src1);
-    }
-
-    void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
-    {
-        dst[0] = src0[0] & src1[0];
-    }
-};
-
-struct BitwiseOr
-{
-    typedef u8 type;
-
-    void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
-                     uint8x16_t & v_dst) const
-    {
-        v_dst = vorrq_u8(v_src0, v_src1);
-    }
-
-    void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
-                     uint8x8_t & v_dst) const
-    {
-        v_dst = vorr_u8(v_src0, v_src1);
-    }
-
-    void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
-    {
-        dst[0] = src0[0] | src1[0];
-    }
-};
-
-struct BitwiseXor
-{
-    typedef u8 type;
-
-    void operator() (const uint8x16_t & v_src0, const uint8x16_t & v_src1,
-                     uint8x16_t & v_dst) const
-    {
-        v_dst = veorq_u8(v_src0, v_src1);
-    }
-
-    void operator() (const uint8x8_t & v_src0, const uint8x8_t & v_src1,
-                     uint8x8_t & v_dst) const
-    {
-        v_dst = veor_u8(v_src0, v_src1);
-    }
-
-    void operator() (const u8 * src0, const u8 * src1, u8 * dst) const
-    {
-        dst[0] = src0[0] ^ src1[0];
-    }
-};
-
-#endif
-
-void bitwiseNot(const Size2D &size,
-                const u8 *srcBase, ptrdiff_t srcStride,
-                u8 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8* src = internal::getRowPtr(srcBase, srcStride, i);
-        u8* dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw32; j += 32)
-        {
-            internal::prefetch(src + j);
-            uint8x16_t v_src0 = vld1q_u8(src + j), v_src1 = vld1q_u8(src + j + 16);
-            uint8x16_t v_dst0 = vmvnq_u8(v_src0), v_dst1 = vmvnq_u8(v_src1);
-            vst1q_u8(dst + j, v_dst0);
-            vst1q_u8(dst + j + 16, v_dst1);
-        }
-        for (; j < roiw8; j += 8)
-        {
-            uint8x8_t v_src = vld1_u8(src + j);
-            uint8x8_t v_dst = vmvn_u8(v_src);
-            vst1_u8(dst + j, v_dst);
-        }
-
-        for (; j < size.width; j++)
-        {
-            dst[j] = ~src[j];
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void bitwiseAnd(const Size2D &size,
-                const u8 *src0Base, ptrdiff_t src0Stride,
-                const u8 *src1Base, ptrdiff_t src1Stride,
-                u8 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, BitwiseAnd());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void bitwiseOr(const Size2D &size,
-               const u8 *src0Base, ptrdiff_t src0Stride,
-               const u8 *src1Base, ptrdiff_t src1Stride,
-               u8 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, BitwiseOr());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void bitwiseXor(const Size2D &size,
-                const u8 *src0Base, ptrdiff_t src0Stride,
-                const u8 *src1Base, ptrdiff_t src1Stride,
-                u8 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride, BitwiseXor());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/blur.cpp
+++ b/3rdparty/carotene/src/blur.cpp
--- a/3rdparty/carotene/src/canny.cpp
+++ b/3rdparty/carotene/src/canny.cpp
@ -1,773 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include "saturate_cast.hpp"
-#include <vector>
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-namespace {
-struct RowFilter3x3Canny
-{
-    inline RowFilter3x3Canny(const ptrdiff_t borderxl, const ptrdiff_t borderxr)
-    {
-        vfmask = vreinterpret_u8_u64(vmov_n_u64(borderxl ? 0x0000FFffFFffFFffULL : 0x0100FFffFFffFFffULL));
-        vtmask = vreinterpret_u8_u64(vmov_n_u64(borderxr ? 0x0707060504030201ULL : 0x0706050403020100ULL));
-        lookLeft = offsetk - borderxl;
-        lookRight = offsetk - borderxr;
-    }
-
-    inline void operator()(const u8* src, s16* dstx, s16* dsty, ptrdiff_t width)
-    {
-        uint8x8_t l = vtbl1_u8(vld1_u8(src - lookLeft), vfmask);
-        ptrdiff_t i = 0;
-        for (; i < width - 8 + lookRight; i += 8)
-        {
-            internal::prefetch(src + i);
-            uint8x8_t l18u = vld1_u8(src + i + 1);
-
-            uint8x8_t l2 = l18u;
-            uint8x8_t l0 = vext_u8(l, l18u, 6);
-            int16x8_t l1x2 = vreinterpretq_s16_u16(vshll_n_u8(vext_u8(l, l18u, 7), 1));
-
-            l = l18u;
-
-            int16x8_t l02 = vreinterpretq_s16_u16(vaddl_u8(l2, l0));
-            int16x8_t ldx = vreinterpretq_s16_u16(vsubl_u8(l2, l0));
-            int16x8_t ldy = vaddq_s16(l02, l1x2);
-
-            vst1q_s16(dstx + i, ldx);
-            vst1q_s16(dsty + i, ldy);
-        }
-
-        //tail
-        if (lookRight == 0 || i != width)
-        {
-            uint8x8_t tail0 = vld1_u8(src + (width - 9));//can't get left 1 pixel another way if width==8*k+1
-            uint8x8_t tail2 = vtbl1_u8(vld1_u8(src + (width - 8 + lookRight)), vtmask);
-            uint8x8_t tail1 = vext_u8(vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(tail0), 8*6)), tail2, 7);
-
-            int16x8_t tail02 = vreinterpretq_s16_u16(vaddl_u8(tail2, tail0));
-            int16x8_t tail1x2 = vreinterpretq_s16_u16(vshll_n_u8(tail1, 1));
-            int16x8_t taildx = vreinterpretq_s16_u16(vsubl_u8(tail2, tail0));
-            int16x8_t taildy = vqaddq_s16(tail02, tail1x2);
-
-            vst1q_s16(dstx + (width - 8), taildx);
-            vst1q_s16(dsty + (width - 8), taildy);
-        }
-    }
-
-    uint8x8_t vfmask;
-    uint8x8_t vtmask;
-    enum { offsetk = 1};
-    ptrdiff_t lookLeft;
-    ptrdiff_t lookRight;
-};
-
-template <bool L2gradient>
-inline void ColFilter3x3Canny(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
-{
-    ptrdiff_t j = 0;
-    for (; j <= width - 8; j += 8)
-    {
-        ColFilter3x3CannyL1Loop:
-        int16x8_t line0x = vld1q_s16(src0 + j);
-        int16x8_t line1x = vld1q_s16(src1 + j);
-        int16x8_t line2x = vld1q_s16(src2 + j);
-        int16x8_t line0y = vld1q_s16(src0 + j + width);
-        int16x8_t line2y = vld1q_s16(src2 + j + width);
-
-        int16x8_t l02 = vaddq_s16(line0x, line2x);
-        int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
-        int16x8_t dy = vsubq_s16(line2y, line0y);
-        int16x8_t dx = vaddq_s16(l1x2, l02);
-
-        int16x8_t dya = vabsq_s16(dy);
-        int16x8_t dxa = vabsq_s16(dx);
-        int16x8_t norm = vaddq_s16(dya, dxa);
-
-        int32x4_t normh = vmovl_s16(vget_high_s16(norm));
-        int32x4_t norml = vmovl_s16(vget_low_s16(norm));
-
-        vst1q_s16(dsty + j, dy);
-        vst1q_s16(dstx + j, dx);
-        vst1q_s32(mag + j + 4, normh);
-        vst1q_s32(mag + j, norml);
-    }
-    if (j != width)
-    {
-        j = width - 8;
-        goto ColFilter3x3CannyL1Loop;
-    }
-}
-template <>
-inline void ColFilter3x3Canny<true>(const s16* src0, const s16* src1, const s16* src2, s16* dstx, s16* dsty, s32* mag, ptrdiff_t width)
-{
-    ptrdiff_t j = 0;
-    for (; j <= width - 8; j += 8)
-    {
-        ColFilter3x3CannyL2Loop:
-        int16x8_t line0x = vld1q_s16(src0 + j);
-        int16x8_t line1x = vld1q_s16(src1 + j);
-        int16x8_t line2x = vld1q_s16(src2 + j);
-        int16x8_t line0y = vld1q_s16(src0 + j + width);
-        int16x8_t line2y = vld1q_s16(src2 + j + width);
-
-        int16x8_t l02 = vaddq_s16(line0x, line2x);
-        int16x8_t l1x2 = vshlq_n_s16(line1x, 1);
-        int16x8_t dy = vsubq_s16(line2y, line0y);
-        int16x8_t dx = vaddq_s16(l1x2, l02);
-
-        int32x4_t norml = vmull_s16(vget_low_s16(dx), vget_low_s16(dx));
-        int32x4_t normh = vmull_s16(vget_high_s16(dy), vget_high_s16(dy));
-
-        norml = vmlal_s16(norml, vget_low_s16(dy), vget_low_s16(dy));
-        normh = vmlal_s16(normh, vget_high_s16(dx), vget_high_s16(dx));
-
-        vst1q_s16(dsty + j, dy);
-        vst1q_s16(dstx + j, dx);
-        vst1q_s32(mag + j, norml);
-        vst1q_s32(mag + j + 4, normh);
-    }
-    if (j != width)
-    {
-        j = width - 8;
-        goto ColFilter3x3CannyL2Loop;
-    }
-}
-
-template <bool L2gradient>
-inline void NormCanny(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
-{
-    ptrdiff_t j = 0;
-    if (colscn >= 8)
-    {
-        int16x8_t vx = vld1q_s16(_dx);
-        int16x8_t vy = vld1q_s16(_dy);
-        for (; j <= colscn - 16; j+=8)
-        {
-            internal::prefetch(_dx);
-            internal::prefetch(_dy);
-
-            int16x8_t vx2 = vld1q_s16(_dx + j + 8);
-            int16x8_t vy2 = vld1q_s16(_dy + j + 8);
-
-            int16x8_t vabsx = vabsq_s16(vx);
-            int16x8_t vabsy = vabsq_s16(vy);
-
-            int16x8_t norm = vaddq_s16(vabsx, vabsy);
-
-            int32x4_t normh = vmovl_s16(vget_high_s16(norm));
-            int32x4_t norml = vmovl_s16(vget_low_s16(norm));
-
-            vst1q_s32(_norm + j + 4, normh);
-            vst1q_s32(_norm + j + 0, norml);
-
-            vx = vx2;
-            vy = vy2;
-        }
-        int16x8_t vabsx = vabsq_s16(vx);
-        int16x8_t vabsy = vabsq_s16(vy);
-
-        int16x8_t norm = vaddq_s16(vabsx, vabsy);
-
-        int32x4_t normh = vmovl_s16(vget_high_s16(norm));
-        int32x4_t norml = vmovl_s16(vget_low_s16(norm));
-
-        vst1q_s32(_norm + j + 4, normh);
-        vst1q_s32(_norm + j + 0, norml);
-    }
-    for (; j < colscn; j++)
-        _norm[j] = std::abs(s32(_dx[j])) + std::abs(s32(_dy[j]));
-}
-
-template <>
-inline void NormCanny<true>(const ptrdiff_t colscn, s16* _dx, s16* _dy, s32* _norm)
-{
-    ptrdiff_t j = 0;
-    if (colscn >= 8)
-    {
-        int16x8_t vx = vld1q_s16(_dx);
-        int16x8_t vy = vld1q_s16(_dy);
-
-        for (; j <= colscn - 16; j+=8)
-        {
-            internal::prefetch(_dx);
-            internal::prefetch(_dy);
-
-            int16x8_t vxnext = vld1q_s16(_dx + j + 8);
-            int16x8_t vynext = vld1q_s16(_dy + j + 8);
-
-            int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
-            int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
-
-            norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
-            normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
-
-            vst1q_s32(_norm + j + 0, norml);
-            vst1q_s32(_norm + j + 4, normh);
-
-            vx = vxnext;
-            vy = vynext;
-        }
-        int32x4_t norml = vmull_s16(vget_low_s16(vx), vget_low_s16(vx));
-        int32x4_t normh = vmull_s16(vget_high_s16(vy), vget_high_s16(vy));
-
-        norml = vmlal_s16(norml, vget_low_s16(vy), vget_low_s16(vy));
-        normh = vmlal_s16(normh, vget_high_s16(vx), vget_high_s16(vx));
-
-        vst1q_s32(_norm + j + 0, norml);
-        vst1q_s32(_norm + j + 4, normh);
-    }
-    for (; j < colscn; j++)
-        _norm[j] = s32(_dx[j])*_dx[j] + s32(_dy[j])*_dy[j];
-}
-
-template <bool L2gradient>
-inline void prepareThresh(f64 low_thresh, f64 high_thresh,
-                          s32 &low, s32 &high)
-{
-    if (low_thresh > high_thresh)
-        std::swap(low_thresh, high_thresh);
-#if defined __GNUC__
-    low = (s32)low_thresh;
-    high = (s32)high_thresh;
-    low -= (low > low_thresh);
-    high -= (high > high_thresh);
-#else
-    low = internal::round(low_thresh);
-    high = internal::round(high_thresh);
-    f32 ldiff = (f32)(low_thresh - low);
-    f32 hdiff = (f32)(high_thresh - high);
-    low -= (ldiff < 0);
-    high -= (hdiff < 0);
-#endif
-}
-template <>
-inline void prepareThresh<true>(f64 low_thresh, f64 high_thresh,
-                                s32 &low, s32 &high)
-{
-    if (low_thresh > high_thresh)
-        std::swap(low_thresh, high_thresh);
-    if (low_thresh > 0) low_thresh *= low_thresh;
-    if (high_thresh > 0) high_thresh *= high_thresh;
-#if defined __GNUC__
-    low = (s32)low_thresh;
-    high = (s32)high_thresh;
-    low -= (low > low_thresh);
-    high -= (high > high_thresh);
-#else
-    low = internal::round(low_thresh);
-    high = internal::round(high_thresh);
-    f32 ldiff = (f32)(low_thresh - low);
-    f32 hdiff = (f32)(high_thresh - high);
-    low -= (ldiff < 0);
-    high -= (hdiff < 0);
-#endif
-}
-
-template <bool L2gradient, bool externalSobel>
-struct _normEstimator
-{
-    ptrdiff_t magstep;
-    ptrdiff_t dxOffset;
-    ptrdiff_t dyOffset;
-    ptrdiff_t shxOffset;
-    ptrdiff_t shyOffset;
-    std::vector<u8> buffer;
-    const ptrdiff_t offsetk;
-    ptrdiff_t borderyt, borderyb;
-    RowFilter3x3Canny sobelRow;
-
-    inline _normEstimator(const Size2D &size, s32, Margin borderMargin,
-                          ptrdiff_t &mapstep, s32** mag_buf, u8* &map):
-                          offsetk(1),
-                          sobelRow(std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.left),
-                                   std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.right))
-    {
-        mapstep = size.width + 2;
-        magstep = size.width + 2 + size.width * (4 * sizeof(s16)/sizeof(s32));
-        dxOffset = mapstep * sizeof(s32)/sizeof(s16);
-        dyOffset = dxOffset + size.width * 1;
-        shxOffset = dxOffset + size.width * 2;
-        shyOffset = dxOffset + size.width * 3;
-        buffer.resize( (size.width+2)*(size.height+2) + magstep*3*sizeof(s32) );
-        mag_buf[0] = (s32*)&buffer[0];
-        mag_buf[1] = mag_buf[0] + magstep;
-        mag_buf[2] = mag_buf[1] + magstep;
-        memset(mag_buf[0], 0, mapstep * sizeof(s32));
-
-        map = (u8*)(mag_buf[2] + magstep);
-        memset(map, 1, mapstep);
-        memset(map + mapstep*(size.height + 1), 1, mapstep);
-        borderyt = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.top);
-        borderyb = std::max<ptrdiff_t>(0, offsetk - (ptrdiff_t)borderMargin.bottom);
-    }
-    inline void firstRow(const Size2D &size, s32,
-                         const u8 *srcBase, ptrdiff_t srcStride,
-                         s16*, ptrdiff_t,
-                         s16*, ptrdiff_t,
-                         s32** mag_buf)
-    {
-        //sobelH row #0
-        const u8* _src = internal::getRowPtr(srcBase, srcStride, 0);
-        sobelRow(_src, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shyOffset, size.width);
-        //sobelH row #1
-        _src = internal::getRowPtr(srcBase, srcStride, 1);
-        sobelRow(_src, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shyOffset, size.width);
-
-        mag_buf[1][0] = mag_buf[1][size.width+1] = 0;
-        if (borderyt == 0)
-        {
-            //sobelH row #-1
-            _src = internal::getRowPtr(srcBase, srcStride, -1);
-            sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
-
-            ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
-                                           ((s16*)mag_buf[1]) + dxOffset,  ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
-        }
-        else
-        {
-            ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
-                                           ((s16*)mag_buf[1]) + dxOffset,  ((s16*)mag_buf[1]) + dyOffset, mag_buf[1] + 1, size.width);
-        }
-    }
-    inline void nextRow(const Size2D &size, s32,
-                        const u8 *srcBase, ptrdiff_t srcStride,
-                        s16*, ptrdiff_t,
-                        s16*, ptrdiff_t,
-                        const ptrdiff_t &mapstep, s32** mag_buf,
-                        size_t i, const s16* &_x, const s16* &_y)
-    {
-        mag_buf[2][0] = mag_buf[2][size.width+1] = 0;
-        if (i < size.height - borderyb)
-        {
-            const u8* _src = internal::getRowPtr(srcBase, srcStride, i+1);
-            //sobelH row #i+1
-            sobelRow(_src, ((s16*)mag_buf[2]) + shxOffset, ((s16*)mag_buf[2]) + shyOffset, size.width);
-
-            ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[2]) + shxOffset,
-                                           ((s16*)mag_buf[2]) + dxOffset,  ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
-        }
-        else if (i < size.height)
-        {
-            ColFilter3x3Canny<L2gradient>( ((s16*)mag_buf[0]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset, ((s16*)mag_buf[1]) + shxOffset,
-                                           ((s16*)mag_buf[2]) + dxOffset,  ((s16*)mag_buf[2]) + dyOffset, mag_buf[2] + 1, size.width);
-        }
-        else
-            memset(mag_buf[2], 0, mapstep*sizeof(s32));
-        _x = ((s16*)mag_buf[1]) + dxOffset;
-        _y = ((s16*)mag_buf[1]) + dyOffset;
-    }
-};
-template <bool L2gradient>
-struct _normEstimator<L2gradient, true>
-{
-    std::vector<u8> buffer;
-
-    inline _normEstimator(const Size2D &size, s32 cn, Margin,
-                          ptrdiff_t &mapstep, s32** mag_buf, u8* &map)
-    {
-        mapstep = size.width + 2;
-        buffer.resize( (size.width+2)*(size.height+2) + cn*mapstep*3*sizeof(s32) );
-        mag_buf[0] = (s32*)&buffer[0];
-        mag_buf[1] = mag_buf[0] + mapstep*cn;
-        mag_buf[2] = mag_buf[1] + mapstep*cn;
-        memset(mag_buf[0], 0, /* cn* */mapstep * sizeof(s32));
-
-        map = (u8*)(mag_buf[2] + mapstep*cn);
-        memset(map, 1, mapstep);
-        memset(map + mapstep*(size.height + 1), 1, mapstep);
-    }
-    inline void firstRow(const Size2D &size, s32 cn,
-                         const u8 *, ptrdiff_t,
-                         s16* dxBase, ptrdiff_t dxStride,
-                         s16* dyBase, ptrdiff_t dyStride,
-                         s32** mag_buf)
-    {
-        s32* _norm = mag_buf[1] + 1;
-
-        s16* _dx = internal::getRowPtr(dxBase, dxStride, 0);
-        s16* _dy = internal::getRowPtr(dyBase, dyStride, 0);
-
-        NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
-
-        if(cn > 1)
-        {
-            for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
-            {
-                size_t maxIdx = jn;
-                for(s32 k = 1; k < cn; ++k)
-                    if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
-                _norm[j] = _norm[maxIdx];
-                _dx[j] = _dx[maxIdx];
-                _dy[j] = _dy[maxIdx];
-            }
-        }
-
-        _norm[-1] = _norm[size.width] = 0;
-    }
-    inline void nextRow(const Size2D &size, s32 cn,
-                        const u8 *, ptrdiff_t,
-                        s16* dxBase, ptrdiff_t dxStride,
-                        s16* dyBase, ptrdiff_t dyStride,
-                        const ptrdiff_t &mapstep, s32** mag_buf,
-                        size_t i, const s16* &_x, const s16* &_y)
-    {
-        s32* _norm = mag_buf[(i > 0) + 1] + 1;
-        if (i < size.height)
-        {
-            s16* _dx = internal::getRowPtr(dxBase, dxStride, i);
-            s16* _dy = internal::getRowPtr(dyBase, dyStride, i);
-
-            NormCanny<L2gradient>(size.width*cn, _dx, _dy, _norm);
-
-            if(cn > 1)
-            {
-                for(size_t j = 0, jn = 0; j < size.width; ++j, jn += cn)
-                {
-                    size_t maxIdx = jn;
-                    for(s32 k = 1; k < cn; ++k)
-                        if(_norm[jn + k] > _norm[maxIdx]) maxIdx = jn + k;
-                    _norm[j] = _norm[maxIdx];
-                    _dx[j] = _dx[maxIdx];
-                    _dy[j] = _dy[maxIdx];
-                }
-            }
-
-            _norm[-1] = _norm[size.width] = 0;
-        }
-        else
-            memset(_norm-1, 0, /* cn* */mapstep*sizeof(s32));
-
-        _x = internal::getRowPtr(dxBase, dxStride, i-1);
-        _y = internal::getRowPtr(dyBase, dyStride, i-1);
-    }
-};
-
-template <bool L2gradient, bool externalSobel>
-inline void Canny3x3(const Size2D &size, s32 cn,
-                     const u8 * srcBase, ptrdiff_t srcStride,
-                     u8 * dstBase, ptrdiff_t dstStride,
-                     s16 * dxBase, ptrdiff_t dxStride,
-                     s16 * dyBase, ptrdiff_t dyStride,
-                     f64 low_thresh, f64 high_thresh,
-                     Margin borderMargin)
-{
-    s32 low, high;
-    prepareThresh<L2gradient>(low_thresh, high_thresh, low, high);
-
-    ptrdiff_t mapstep;
-    s32* mag_buf[3];
-    u8* map;
-    _normEstimator<L2gradient, externalSobel> normEstimator(size, cn, borderMargin, mapstep, mag_buf, map);
-
-    size_t maxsize = std::max<size_t>( 1u << 10, size.width * size.height / 10 );
-    std::vector<u8*> stack( maxsize );
-    u8 **stack_top = &stack[0];
-    u8 **stack_bottom = &stack[0];
-
-    /* sector numbers
-       (Top-Left Origin)
-
-        1   2   3
-         *  *  *
-          * * *
-        0*******0
-          * * *
-         *  *  *
-        3   2   1
-    */
-
-    #define CANNY_PUSH(d)    *(d) = u8(2), *stack_top++ = (d)
-    #define CANNY_POP(d)     (d) = *--stack_top
-
-    //i == 0
-    normEstimator.firstRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mag_buf);
-    // calculate magnitude and angle of gradient, perform non-maxima supression.
-    // fill the map with one of the following values:
-    //   0 - the pixel might belong to an edge
-    //   1 - the pixel can not belong to an edge
-    //   2 - the pixel does belong to an edge
-    for (size_t i = 1; i <= size.height; i++)
-    {
-        const s16 *_x, *_y;
-        normEstimator.nextRow(size, cn, srcBase, srcStride, dxBase, dxStride, dyBase, dyStride, mapstep, mag_buf, i, _x, _y);
-
-        u8* _map = map + mapstep*i + 1;
-        _map[-1] = _map[size.width] = 1;
-
-        s32* _mag = mag_buf[1] + 1; // take the central row
-        ptrdiff_t magstep1 = mag_buf[2] - mag_buf[1];
-        ptrdiff_t magstep2 = mag_buf[0] - mag_buf[1];
-
-        if ((stack_top - stack_bottom) + size.width > maxsize)
-        {
-            ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
-            maxsize = maxsize * 3/2;
-            stack.resize(maxsize);
-            stack_bottom = &stack[0];
-            stack_top = stack_bottom + sz;
-        }
-
-        s32 prev_flag = 0;
-        for (ptrdiff_t j = 0; j < (ptrdiff_t)size.width; j++)
-        {
-            #define CANNY_SHIFT 15
-            const s32 TG22 = (s32)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
-
-            s32 m = _mag[j];
-
-            if (m > low)
-            {
-                s32 xs = _x[j];
-                s32 ys = _y[j];
-                s32 x = abs(xs);
-                s32 y = abs(ys) << CANNY_SHIFT;
-
-                s32 tg22x = x * TG22;
-
-                if (y < tg22x)
-                {
-                    if (m > _mag[j-1] && m >= _mag[j+1]) goto __push;
-                }
-                else
-                {
-                    s32 tg67x = tg22x + (x << (CANNY_SHIFT+1));
-                    if (y > tg67x)
-                    {
-                        if (m > _mag[j+magstep2] && m >= _mag[j+magstep1]) goto __push;
-                    }
-                    else
-                    {
-                        s32 s = (xs ^ ys) < 0 ? -1 : 1;
-                        if(m > _mag[j+magstep2-s] && m > _mag[j+magstep1+s]) goto __push;
-                    }
-                }
-            }
-            prev_flag = 0;
-            _map[j] = u8(1);
-            continue;
-            __push:
-            if (!prev_flag && m > high && _map[j-mapstep] != 2)
-            {
-                CANNY_PUSH(_map + j);
-                prev_flag = 1;
-            }
-            else
-                _map[j] = 0;
-        }
-
-        // scroll the ring buffer
-        _mag = mag_buf[0];
-        mag_buf[0] = mag_buf[1];
-        mag_buf[1] = mag_buf[2];
-        mag_buf[2] = _mag;
-    }
-
-    // now track the edges (hysteresis thresholding)
-    while (stack_top > stack_bottom)
-    {
-        u8* m;
-        if ((size_t)(stack_top - stack_bottom) + 8u > maxsize)
-        {
-            ptrdiff_t sz = (ptrdiff_t)(stack_top - stack_bottom);
-            maxsize = maxsize * 3/2;
-            stack.resize(maxsize);
-            stack_bottom = &stack[0];
-            stack_top = stack_bottom + sz;
-        }
-
-        CANNY_POP(m);
-
-        if (!m[-1])         CANNY_PUSH(m - 1);
-        if (!m[1])          CANNY_PUSH(m + 1);
-        if (!m[-mapstep-1]) CANNY_PUSH(m - mapstep - 1);
-        if (!m[-mapstep])   CANNY_PUSH(m - mapstep);
-        if (!m[-mapstep+1]) CANNY_PUSH(m - mapstep + 1);
-        if (!m[mapstep-1])  CANNY_PUSH(m + mapstep - 1);
-        if (!m[mapstep])    CANNY_PUSH(m + mapstep);
-        if (!m[mapstep+1])  CANNY_PUSH(m + mapstep + 1);
-    }
-
-    // the final pass, form the final image
-    uint8x16_t v2 = vmovq_n_u8(2);
-    const u8* ptrmap = map + mapstep + 1;
-    for (size_t i = 0; i < size.height; i++, ptrmap += mapstep)
-    {
-        u8* _dst = internal::getRowPtr(dstBase, dstStride, i);
-        ptrdiff_t j = 0;
-        for (; j < (ptrdiff_t)size.width - 16; j += 16)
-        {
-            internal::prefetch(ptrmap);
-            uint8x16_t vmap = vld1q_u8(ptrmap + j);
-            uint8x16_t vdst = vceqq_u8(vmap, v2);
-            vst1q_u8(_dst+j, vdst);
-        }
-        for (; j < (ptrdiff_t)size.width; j++)
-            _dst[j] = (u8)-(ptrmap[j] >> 1);
-    }
-}
-
-} // namespace
-#endif
-
-bool isCanny3x3Supported(const Size2D &size)
-{
-    return isSupportedConfiguration() &&
-           size.height >= 2 && size.width >= 9;
-}
-
-void Canny3x3L1(const Size2D &size,
-                const u8 * srcBase, ptrdiff_t srcStride,
-                u8 * dstBase, ptrdiff_t dstStride,
-                f64 low_thresh, f64 high_thresh,
-                Margin borderMargin)
-{
-    internal::assertSupportedConfiguration(isCanny3x3Supported(size));
-#ifdef CAROTENE_NEON
-    Canny3x3<false, false>(size, 1,
-                           srcBase, srcStride,
-                           dstBase, dstStride,
-                           NULL, 0,
-                           NULL, 0,
-                           low_thresh, high_thresh,
-                           borderMargin);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)low_thresh;
-    (void)high_thresh;
-    (void)borderMargin;
-#endif
-}
-
-void Canny3x3L2(const Size2D &size,
-                const u8 * srcBase, ptrdiff_t srcStride,
-                u8 * dstBase, ptrdiff_t dstStride,
-                f64 low_thresh, f64 high_thresh,
-                Margin borderMargin)
-{
-    internal::assertSupportedConfiguration(isCanny3x3Supported(size));
-#ifdef CAROTENE_NEON
-    Canny3x3<true, false>(size, 1,
-                          srcBase, srcStride,
-                          dstBase, dstStride,
-                          NULL, 0,
-                          NULL, 0,
-                          low_thresh, high_thresh,
-                          borderMargin);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)low_thresh;
-    (void)high_thresh;
-    (void)borderMargin;
-#endif
-}
-
-void Canny3x3L1(const Size2D &size, s32 cn,
-                     s16 * dxBase, ptrdiff_t dxStride,
-                     s16 * dyBase, ptrdiff_t dyStride,
-                     u8 * dstBase, ptrdiff_t dstStride,
-                     f64 low_thresh, f64 high_thresh)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Canny3x3<false, true>(size, cn,
-                          NULL, 0,
-                          dstBase, dstStride,
-                          dxBase, dxStride,
-                          dyBase, dyStride,
-                          low_thresh, high_thresh,
-                          Margin());
-#else
-    (void)size;
-    (void)cn;
-    (void)dstBase;
-    (void)dstStride;
-    (void)dxBase;
-    (void)dxStride;
-    (void)dyBase;
-    (void)dyStride;
-    (void)low_thresh;
-    (void)high_thresh;
-#endif
-}
-
-void Canny3x3L2(const Size2D &size, s32 cn,
-                     s16 * dxBase, ptrdiff_t dxStride,
-                     s16 * dyBase, ptrdiff_t dyStride,
-                     u8 * dstBase, ptrdiff_t dstStride,
-                     f64 low_thresh, f64 high_thresh)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Canny3x3<true, true>(size, cn,
-                         NULL, 0,
-                         dstBase, dstStride,
-                         dxBase, dxStride,
-                         dyBase, dyStride,
-                         low_thresh, high_thresh,
-                         Margin());
-#else
-    (void)size;
-    (void)cn;
-    (void)dstBase;
-    (void)dstStride;
-    (void)dxBase;
-    (void)dxStride;
-    (void)dyBase;
-    (void)dyStride;
-    (void)low_thresh;
-    (void)high_thresh;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/channel_extract.cpp
+++ b/3rdparty/carotene/src/channel_extract.cpp
@ -1,486 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-void extract2(const Size2D &size,
-              const u8 * srcBase, ptrdiff_t srcStride,
-              u8 * dstBase, ptrdiff_t dstStride,
-              u32 coi)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-#ifndef ANDROID
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-#endif
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0u; i < size.height; ++i)
-    {
-        const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t sj = 0u, dj = 0u;
-
-#ifndef ANDROID
-        for (; dj < roiw32; sj += 64, dj += 32)
-        {
-            internal::prefetch(src + sj);
-
-            uint8x16x2_t v_src = vld2q_u8(src + sj);
-            vst1q_u8(dst + dj, v_src.val[coi]);
-
-            v_src = vld2q_u8(src + sj + 32);
-            vst1q_u8(dst + dj + 16, v_src.val[coi]);
-        }
-#endif
-
-        for (; dj < roiw8; sj += 16, dj += 8)
-        {
-            uint8x8x2_t v_src = vld2_u8(src + sj);
-            vst1_u8(dst + dj, v_src.val[coi]);
-        }
-
-        for (; dj < size.width; sj += 2, ++dj)
-        {
-            dst[dj] = src[sj + coi];
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)coi;
-#endif
-}
-
-void extract3(const Size2D &size,
-              const u8 * srcBase, ptrdiff_t srcStride,
-              u8 * dstBase, ptrdiff_t dstStride,
-              u32 coi)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-#ifndef ANDROID
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-#endif
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0u; i < size.height; ++i)
-    {
-        const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t sj = 0u, dj = 0u;
-
-#ifndef ANDROID
-        for (; dj < roiw32; sj += 96, dj += 32)
-        {
-            internal::prefetch(src + sj);
-
-            uint8x16x3_t v_src = vld3q_u8(src + sj);
-            vst1q_u8(dst + dj, v_src.val[coi]);
-
-            v_src = vld3q_u8(src + sj + 48);
-            vst1q_u8(dst + dj + 16, v_src.val[coi]);
-        }
-#endif
-
-        for (; dj < roiw8; sj += 24, dj += 8)
-        {
-            uint8x8x3_t v_src = vld3_u8(src + sj);
-            vst1_u8(dst + dj, v_src.val[coi]);
-        }
-
-        for (; dj < size.width; sj += 3, ++dj)
-        {
-            dst[dj] = src[sj + coi];
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)coi;
-#endif
-}
-
-void extract4(const Size2D &size,
-              const u8 * srcBase, ptrdiff_t srcStride,
-              u8 * dstBase, ptrdiff_t dstStride,
-              u32 coi)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-#ifndef ANDROID
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-#endif
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0u; i < size.height; ++i)
-    {
-        const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t sj = 0u, dj = 0u;
-
-#ifndef ANDROID
-        for (; dj < roiw32; sj += 128, dj += 32)
-        {
-            internal::prefetch(src + sj);
-
-            uint8x16x4_t v_src = vld4q_u8(src + sj);
-            vst1q_u8(dst + dj, v_src.val[coi]);
-
-            v_src = vld4q_u8(src + sj + 64);
-            vst1q_u8(dst + dj + 16, v_src.val[coi]);
-        }
-#endif
-
-        for (; dj < roiw8; sj += 32, dj += 8)
-        {
-            uint8x8x4_t v_src = vld4_u8(src + sj);
-            vst1_u8(dst + dj, v_src.val[coi]);
-        }
-
-        for (; dj < size.width; sj += 4, ++dj)
-        {
-            dst[dj] = src[sj + coi];
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)coi;
-#endif
-}
-
-#define FILL_LINES2(macro,type) \
-            macro##_LINE(type,0) \
-            macro##_LINE(type,1)
-#define FILL_LINES3(macro,type) \
-            FILL_LINES2(macro,type) \
-            macro##_LINE(type,2)
-#define FILL_LINES4(macro,type) \
-            FILL_LINES3(macro,type) \
-            macro##_LINE(type,3)
-
-#define FARG_LINE(type, n) , type * dst##n##Base, ptrdiff_t dst##n##Stride
-
-#ifdef CAROTENE_NEON
-
-#define VROW_LINE(type, n) type * dst##n = internal::getRowPtr(dst##n##Base, dst##n##Stride, i);
-#define VST1Q_LINE(type, n) vst1q_##type(dst##n + dj, v_src.val[n]);
-#define VST1_LINE(type, n) vst1_##type(dst##n + dj, v_src.val[n]);
-#define SST_LINE(type, n) dst##n[dj] = src[sj + n];
-
-#define MUL2(val) (val << 1)
-#define MUL3(val) (MUL2(val) + val)
-#define MUL4(val) (val << 2)
-
-#define CONTDST2 srcStride == dst0Stride && \
-                 srcStride == dst1Stride &&
-#define CONTDST3 srcStride == dst0Stride && \
-                 srcStride == dst1Stride && \
-                 srcStride == dst2Stride &&
-#define CONTDST4 srcStride == dst0Stride && \
-                 srcStride == dst1Stride && \
-                 srcStride == dst2Stride && \
-                 srcStride == dst3Stride &&
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ < 7
-
-#define SPLIT_ASM2(sgn, bits) __asm__ ( \
-                                          "vld2." #bits " {d0, d2}, [%[in0]]            \n\t" \
-                                          "vld2." #bits " {d1, d3}, [%[in1]]            \n\t" \
-                                          "vst1." #bits " {d0-d1}, [%[out0]]            \n\t" \
-                                          "vst1." #bits " {d2-d3}, [%[out1]]            \n\t" \
-                                          : \
-                                          : [out0] "r" (dst0 + dj), [out1] "r" (dst1 + dj), \
-                                            [in0]  "r" (src + sj), [in1]  "r" (src + sj + MUL2(8)/sizeof(sgn##bits)) \
-                                          : "d0","d1","d2","d3" \
-                                      );
-#define SPLIT_ASM3(sgn, bits) __asm__ ( \
-                                          "vld3." #bits " {d0, d2, d4}, [%[in0]]        \n\t" \
-                                          "vld3." #bits " {d1, d3, d5}, [%[in1]]        \n\t" \
-                                          "vst1." #bits " {d0-d1}, [%[out0]]            \n\t" \
-                                          "vst1." #bits " {d2-d3}, [%[out1]]            \n\t" \
-                                          "vst1." #bits " {d4-d5}, [%[out2]]            \n\t" \
-                                          : \
-                                          : [out0] "r" (dst0 + dj), [out1] "r" (dst1 + dj), [out2] "r" (dst2 + dj), \
-                                            [in0]  "r" (src + sj), [in1]  "r" (src + sj + MUL3(8)/sizeof(sgn##bits)) \
-                                          : "d0","d1","d2","d3","d4","d5" \
-                                      );
-#define SPLIT_ASM4(sgn, bits) __asm__ ( \
-                                          "vld4." #bits " {d0, d2, d4, d6}, [%[in0]]    \n\t" \
-                                          "vld4." #bits " {d1, d3, d5, d7}, [%[in1]]    \n\t" \
-                                          "vst1." #bits " {d0-d1}, [%[out0]]            \n\t" \
-                                          "vst1." #bits " {d2-d3}, [%[out1]]            \n\t" \
-                                          "vst1." #bits " {d4-d5}, [%[out2]]            \n\t" \
-                                          "vst1." #bits " {d6-d7}, [%[out3]]            \n\t" \
-                                          : \
-                                          : [out0] "r" (dst0 + dj), [out1] "r" (dst1 + dj), [out2] "r" (dst2 + dj), [out3] "r" (dst3 + dj), \
-                                            [in0]  "r" (src + sj), [in1]  "r" (src + sj + MUL4(8)/sizeof(sgn##bits)) \
-                                          : "d0","d1","d2","d3","d4","d5","d6","d7" \
-                                      );
-
-#define SPLIT_QUAD(sgn, bits, n) { \
-                                     internal::prefetch(src + sj); \
-                                     SPLIT_ASM##n(sgn, bits) \
-                                 }
-
-#else
-
-#define SPLIT_QUAD(sgn, bits, n) { \
-                                     internal::prefetch(src + sj); \
-                                     vec128 v_src = vld##n##q_##sgn##bits(src + sj); \
-                                     FILL_LINES##n(VST1Q, sgn##bits) \
-                                 }
-
-#endif // __GNUC__ == 4 && __GNUC_MINOR__ < 7
-
-#define SPLIT(sgn,bits,n) void split##n(const Size2D &_size,                                            \
-                                    const sgn##bits * srcBase, ptrdiff_t srcStride                      \
-                                    FILL_LINES##n(FARG, sgn##bits) )                                    \
-{                                                                                                       \
-    internal::assertSupportedConfiguration();                                                           \
-    Size2D size(_size);                                                                                 \
-    if (CONTDST##n                                                                                      \
-        dst0Stride == (ptrdiff_t)(size.width))                                                          \
-    {                                                                                                   \
-        size.width *= size.height;                                                                      \
-        size.height = 1;                                                                                \
-    }                                                                                                   \
-    typedef internal::VecTraits<sgn##bits, n>::vec128 vec128;                                           \
-    size_t roiw16 = size.width >= (16/sizeof(sgn##bits)-1) ? size.width - (16/sizeof(sgn##bits)-1) : 0; \
-    typedef internal::VecTraits<sgn##bits, n>::vec64 vec64;                                             \
-    size_t roiw8 = size.width >= (8/sizeof(sgn##bits)-1) ? size.width - (8/sizeof(sgn##bits)-1) : 0;    \
-                                                                                                        \
-    for (size_t i = 0u; i < size.height; ++i)                                                           \
-    {                                                                                                   \
-        const sgn##bits * src = internal::getRowPtr(srcBase, srcStride, i);                             \
-        FILL_LINES##n(VROW, sgn##bits)                                                                  \
-        size_t sj = 0u, dj = 0u;                                                                        \
-                                                                                                        \
-        for (; dj < roiw16; sj += MUL##n(16)/sizeof(sgn##bits), dj += 16/sizeof(sgn##bits))             \
-            SPLIT_QUAD(sgn, bits, n)                                                                    \
-                                                                                                        \
-        if (dj < roiw8)                                                                                 \
-        {                                                                                               \
-            vec64 v_src = vld##n##_##sgn##bits(src + sj);                                               \
-            FILL_LINES##n(VST1, sgn##bits)                                                              \
-            sj += MUL##n(8)/sizeof(sgn##bits);                                                          \
-            dj += 8/sizeof(sgn##bits);                                                                  \
-        }                                                                                               \
-                                                                                                        \
-        for (; dj < size.width; sj += n, ++dj)                                                          \
-        {                                                                                               \
-            FILL_LINES##n(SST, sgn##bits)                                                               \
-        }                                                                                               \
-    }                                                                                                   \
-}
-
-#define SPLIT64(sgn,n) void split##n(const Size2D &_size,                                               \
-                                     const sgn##64 * srcBase, ptrdiff_t srcStride                       \
-                                     FILL_LINES##n(FARG, sgn##64) )                                     \
-{                                                                                                       \
-    internal::assertSupportedConfiguration();                                                           \
-    Size2D size(_size);                                                                                 \
-    if (CONTDST##n                                                                                      \
-        dst0Stride == (ptrdiff_t)(size.width))                                                          \
-    {                                                                                                   \
-        size.width *= size.height;                                                                      \
-        size.height = 1;                                                                                \
-    }                                                                                                   \
-    typedef internal::VecTraits<sgn##64, n>::vec64 vec64;                                               \
-                                                                                                        \
-    for (size_t i = 0u; i < size.height; ++i)                                                           \
-    {                                                                                                   \
-        const sgn##64 * src = internal::getRowPtr(srcBase, srcStride, i);                               \
-        FILL_LINES##n(VROW, sgn##64)                                                                    \
-        size_t sj = 0u, dj = 0u;                                                                        \
-                                                                                                        \
-        for (; dj < size.width; sj += n, ++dj)                                                          \
-        {                                                                                               \
-            vec64 v_src = vld##n##_##sgn##64(src + sj);                                                 \
-            FILL_LINES##n(VST1, sgn##64)                                                                \
-        }                                                                                               \
-    }                                                                                                   \
-}
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ < 7
-
-#define ALPHA_QUAD(sgn, bits) { \
-                                  internal::prefetch(src + sj); \
-                                  __asm__ ( \
-                                      "vld4." #bits " {d0, d2, d4, d6}, [%[in0]]    \n\t" \
-                                      "vld4." #bits " {d1, d3, d5, d7}, [%[in1]]    \n\t" \
-                                      "vst3." #bits " {d0, d2, d4}, [%[out3_1]]     \n\t" \
-                                      "vst3." #bits " {d1, d3, d5}, [%[out3_2]]     \n\t" \
-                                      "vst1." #bits " {d6-d7}, [%[out1]]            \n\t" \
-                                      : \
-                                      : [out3_1] "r" (dst3 + d3j), [out3_2] "r" (dst3 + d3j + 24/sizeof(sgn##bits)), [out1] "r" (dst1 + d1j), \
-                                        [in0]  "r" (src + sj), [in1]  "r" (src + sj + 32/sizeof(sgn##bits)) \
-                                      : "d0","d1","d2","d3","d4","d5","d6","d7" \
-                                  ); \
-                              }
-
-#else
-
-#define ALPHA_QUAD(sgn, bits) { \
-                                  internal::prefetch(src + sj); \
-                                  union { vec128_4 v4; vec128_3 v3; } vals; \
-                                  vals.v4 = vld4q_##sgn##bits(src + sj); \
-                                  vst3q_##sgn##bits(dst3 + d3j, vals.v3); \
-                                  vst1q_##sgn##bits(dst1 + d1j, vals.v4.val[3]); \
-                              }
-
-#endif // __GNUC__ == 4 && __GNUC_MINOR__ < 7
-
-#define SPLIT4ALPHA(sgn,bits) void split4(const Size2D &_size,                                          \
-                                          const sgn##bits * srcBase, ptrdiff_t srcStride,               \
-                                          sgn##bits * dst3Base, ptrdiff_t dst3Stride,                   \
-                                          sgn##bits * dst1Base, ptrdiff_t dst1Stride)                   \
-{                                                                                                       \
-    internal::assertSupportedConfiguration();                                                           \
-    Size2D size(_size);                                                                                 \
-    if (srcStride == dst3Stride &&                                                                      \
-        srcStride == dst1Stride &&                                                                      \
-        srcStride == (ptrdiff_t)(size.width))                                                           \
-    {                                                                                                   \
-        size.width *= size.height;                                                                      \
-        size.height = 1;                                                                                \
-    }                                                                                                   \
-    typedef internal::VecTraits<sgn##bits, 4>::vec128 vec128_4;                                         \
-    typedef internal::VecTraits<sgn##bits, 3>::vec128 vec128_3;                                         \
-    size_t roiw16 = size.width >= (16/sizeof(sgn##bits)-1) ? size.width - (16/sizeof(sgn##bits)-1) : 0; \
-    typedef internal::VecTraits<sgn##bits, 4>::vec64 vec64_4;                                           \
-    typedef internal::VecTraits<sgn##bits, 3>::vec64 vec64_3;                                           \
-    size_t roiw8 = size.width >= (8/sizeof(sgn##bits)-1) ? size.width - (8/sizeof(sgn##bits)-1) : 0;    \
-                                                                                                        \
-    for (size_t i = 0u; i < size.height; ++i)                                                           \
-    {                                                                                                   \
-        const sgn##bits * src = internal::getRowPtr(srcBase, srcStride, i);                             \
-        sgn##bits * dst3 = internal::getRowPtr(dst3Base, dst3Stride, i);                                \
-        sgn##bits * dst1 = internal::getRowPtr(dst1Base, dst1Stride, i);                                \
-        size_t sj = 0u, d3j = 0u, d1j = 0u;                                                             \
-                                                                                                        \
-        for (; d1j < roiw16; sj += MUL4(16)/sizeof(sgn##bits), d3j += MUL3(16)/sizeof(sgn##bits),       \
-                                                               d1j += 16/sizeof(sgn##bits))             \
-            ALPHA_QUAD(sgn, bits)                                                                       \
-                                                                                                        \
-        if (d1j < roiw8)                                                                                \
-        {                                                                                               \
-            union { vec64_4 v4; vec64_3 v3; } vals;                                                     \
-            vals.v4 = vld4_##sgn##bits(src + sj);                                                       \
-            vst3_u8(dst3 + d3j, vals.v3);                                                               \
-            vst1_u8(dst1 + d1j, vals.v4.val[3]);                                                        \
-            sj += MUL4(8)/sizeof(sgn##bits);                                                            \
-            d3j += MUL3(8)/sizeof(sgn##bits);                                                           \
-            d1j += 8/sizeof(sgn##bits);                                                                 \
-        }                                                                                               \
-                                                                                                        \
-        for (; d1j < size.width; sj += 4, d3j += 3, ++d1j)                                              \
-        {                                                                                               \
-            dst3[d3j+0] = src[sj + 0];                                                                  \
-            dst3[d3j+1] = src[sj + 1];                                                                  \
-            dst3[d3j+2] = src[sj + 2];                                                                  \
-            dst1[d1j]   = src[sj + 3];                                                                  \
-        }                                                                                               \
-    }                                                                                                   \
-}
-
-#else
-
-#define VOID_LINE(type, n) (void)dst##n##Base; (void)dst##n##Stride;
-
-#define SPLIT(sgn,bits,n) void split##n(const Size2D &size,                                          \
-                                    const sgn##bits * srcBase, ptrdiff_t srcStride                   \
-                                    FILL_LINES##n(FARG, sgn##bits) )                                 \
-{                                                                                                    \
-    internal::assertSupportedConfiguration();                                                        \
-    (void)size;                                                                                      \
-    (void)srcBase;                                                                                   \
-    (void)srcStride;                                                                                 \
-    FILL_LINES##n(VOID, sgn##bits)                                                                   \
-}
-
-#define SPLIT64(sgn,n) SPLIT(sgn,64,n)
-
-#define SPLIT4ALPHA(sgn,bits) void split4(const Size2D &size,                                        \
-                                          const sgn##bits * srcBase, ptrdiff_t srcStride,            \
-                                          sgn##bits * dst3Base, ptrdiff_t dst3Stride,                \
-                                          sgn##bits * dst1Base, ptrdiff_t dst1Stride)                \
-{                                                                                                    \
-    internal::assertSupportedConfiguration();                                                        \
-    (void)size;                                                                                      \
-    (void)srcBase;                                                                                   \
-    (void)srcStride;                                                                                 \
-    (void)dst3Base;                                                                                  \
-    (void)dst3Stride;                                                                                \
-    (void)dst1Base;                                                                                  \
-    (void)dst1Stride;                                                                                \
-}
-
-#endif //CAROTENE_NEON
-
-SPLIT(u, 8,2)
-SPLIT(u, 8,3)
-SPLIT(u, 8,4)
-SPLIT(u,16,2)
-SPLIT(u,16,3)
-SPLIT(u,16,4)
-SPLIT(s,32,2)
-SPLIT(s,32,3)
-SPLIT(s,32,4)
-
-SPLIT64(s, 2)
-SPLIT64(s, 3)
-SPLIT64(s, 4)
-
-SPLIT4ALPHA(u,8)
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/channels_combine.cpp
+++ b/3rdparty/carotene/src/channels_combine.cpp
@ -1,389 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#define FILL_LINES2(macro,type) \
-            macro##_LINE(type,0) \
-            macro##_LINE(type,1)
-#define FILL_LINES3(macro,type) \
-            FILL_LINES2(macro,type) \
-            macro##_LINE(type,2)
-#define FILL_LINES4(macro,type) \
-            FILL_LINES3(macro,type) \
-            macro##_LINE(type,3)
-
-#define  FARG_LINE(type, n) , const type * src##n##Base, ptrdiff_t src##n##Stride
-
-#ifdef CAROTENE_NEON
-
-#define  VROW_LINE(type, n) const type * src##n = internal::getRowPtr(src##n##Base, src##n##Stride, i);
-#define  PREF_LINE(type, n) internal::prefetch(src##n + sj);
-#define VLD1Q_LINE(type, n) v_dst.val[n] = vld1q_##type(src##n + sj);
-#define  PRLD_LINE(type, n) internal::prefetch(src##n + sj); v_dst.val[n] = vld1q_##type(src##n + sj);
-#define  VLD1_LINE(type, n) v_dst.val[n] = vld1_##type(src##n + sj);
-#define   SLD_LINE(type, n) dst[dj + n] = src##n[sj];
-
-#define MUL2(val) (val << 1)
-#define MUL3(val) (MUL2(val) + val)
-#define MUL4(val) (val << 2)
-
-#define CONTSRC2 dstStride == src0Stride && \
-                 dstStride == src1Stride &&
-#define CONTSRC3 dstStride == src0Stride && \
-                 dstStride == src1Stride && \
-                 dstStride == src2Stride &&
-#define CONTSRC4 dstStride == src0Stride && \
-                 dstStride == src1Stride && \
-                 dstStride == src2Stride && \
-                 dstStride == src3Stride &&
-
-#if __GNUC__ == 4 && __GNUC_MINOR__ < 7
-
-#define MERGE_ASM2(sgn, bits) __asm__ ( \
-                                          "vld1." #bits " {d0-d1}, [%[in0]]             \n\t" \
-                                          "vld1." #bits " {d2-d3}, [%[in1]]             \n\t" \
-                                          "vst2." #bits " {d0, d2}, [%[out0]]           \n\t" \
-                                          "vst2." #bits " {d1, d3}, [%[out1]]           \n\t" \
-                                          : \
-                                          : [in0] "r" (src0 + sj), [in1] "r" (src1 + sj), \
-                                            [out0]  "r" (dst + dj), [out1]  "r" (dst + dj + MUL2(8)/sizeof(sgn##bits)) \
-                                          : "d0","d1","d2","d3" \
-                                      );
-#define MERGE_ASM3(sgn, bits) __asm__ ( \
-                                          "vld1." #bits " {d0-d1}, [%[in0]]             \n\t" \
-                                          "vld1." #bits " {d2-d3}, [%[in1]]             \n\t" \
-                                          "vld1." #bits " {d4-d5}, [%[in2]]             \n\t" \
-                                          "vst3." #bits " {d0, d2, d4}, [%[out0]]       \n\t" \
-                                          "vst3." #bits " {d1, d3, d5}, [%[out1]]       \n\t" \
-                                          : \
-                                          : [in0] "r" (src0 + sj), [in1] "r" (src1 + sj), [in2] "r" (src2 + sj), \
-                                            [out0]  "r" (dst + dj), [out1]  "r" (dst + dj + MUL3(8)/sizeof(sgn##bits)) \
-                                          : "d0","d1","d2","d3","d4","d5" \
-                                      );
-#define MERGE_ASM4(sgn, bits) __asm__ ( \
-                                          "vld1." #bits " {d0-d1}, [%[in0]]             \n\t" \
-                                          "vld1." #bits " {d2-d3}, [%[in1]]             \n\t" \
-                                          "vld1." #bits " {d4-d5}, [%[in2]]             \n\t" \
-                                          "vld1." #bits " {d6-d7}, [%[in3]]             \n\t" \
-                                          "vst4." #bits " {d0, d2, d4, d6}, [%[out0]]   \n\t" \
-                                          "vst4." #bits " {d1, d3, d5, d7}, [%[out1]]   \n\t" \
-                                          : \
-                                          : [in0] "r" (src0 + sj), [in1] "r" (src1 + sj), [in2] "r" (src2 + sj), [in3] "r" (src3 + sj), \
-                                            [out0]  "r" (dst + dj), [out1]  "r" (dst + dj + MUL4(8)/sizeof(sgn##bits)) \
-                                          : "d0","d1","d2","d3","d4","d5","d6","d7" \
-                                      );
-
-#define MERGE_QUAD(sgn, bits, n) { \
-                                     FILL_LINES##n(PREF, sgn##bits) \
-                                     MERGE_ASM##n(sgn, bits) \
-                                 }
-
-#else
-
-#define MERGE_QUAD(sgn, bits, n) { \
-                                     vec128 v_dst; \
-                                     /*FILL_LINES##n(PREF, sgn##bits) \
-                                     FILL_LINES##n(VLD1Q, sgn##bits)*/ \
-                                     FILL_LINES##n(PRLD, sgn##bits) \
-                                     vst##n##q_##sgn##bits(dst + dj, v_dst); \
-                                 }
-
-#endif // __GNUC__ == 4 && __GNUC_MINOR__ < 7
-
-#define COMBINE(sgn,bits,n) void combine##n(const Size2D &_size                                             \
-                                        FILL_LINES##n(FARG, sgn##bits),                                     \
-                                        sgn##bits * dstBase, ptrdiff_t dstStride)                           \
-{                                                                                                           \
-    internal::assertSupportedConfiguration();                                                               \
-    Size2D size(_size);                                                                                     \
-    if (CONTSRC##n                                                                                          \
-        dstStride == (ptrdiff_t)(size.width))                                                               \
-    {                                                                                                       \
-        size.width *= size.height;                                                                          \
-        size.height = 1;                                                                                    \
-    }                                                                                                       \
-    typedef internal::VecTraits<sgn##bits, n>::vec128 vec128;                                               \
-    size_t roiw16 = size.width >= (16/sizeof(sgn##bits) - 1) ? size.width - (16/sizeof(sgn##bits) - 1) : 0; \
-    typedef internal::VecTraits<sgn##bits, n>::vec64 vec64;                                                 \
-    size_t roiw8 = size.width >= (8/sizeof(sgn##bits) - 1) ? size.width - (8/sizeof(sgn##bits) - 1) : 0;    \
-                                                                                                            \
-    for (size_t i = 0u; i < size.height; ++i)                                                               \
-    {                                                                                                       \
-        FILL_LINES##n(VROW, sgn##bits)                                                                      \
-        sgn##bits * dst = internal::getRowPtr(dstBase, dstStride, i);                                       \
-        size_t sj = 0u, dj = 0u;                                                                            \
-                                                                                                            \
-        for (; sj < roiw16; sj += 16/sizeof(sgn##bits), dj += MUL##n(16)/sizeof(sgn##bits))                 \
-            MERGE_QUAD(sgn, bits, n)                                                                        \
-                                                                                                            \
-        if ( sj < roiw8 )                                                                                   \
-        {                                                                                                   \
-            vec64 v_dst;                                                                                    \
-            FILL_LINES##n(VLD1, sgn##bits)                                                                  \
-            vst##n##_##sgn##bits(dst + dj, v_dst);                                                          \
-            sj += 8/sizeof(sgn##bits); dj += MUL##n(8)/sizeof(sgn##bits);                                   \
-        }                                                                                                   \
-                                                                                                            \
-        for (; sj < size.width; ++sj, dj += n)                                                              \
-        {                                                                                                   \
-            FILL_LINES##n(SLD, sgn##bits)                                                                   \
-        }                                                                                                   \
-    }                                                                                                       \
-}
-
-#define COMBINE64(sgn,n) void combine##n(const Size2D &_size                                                \
-                                               FILL_LINES##n(FARG, sgn##64),                                \
-                                               sgn##64 * dstBase, ptrdiff_t dstStride)                      \
-{                                                                                                           \
-    internal::assertSupportedConfiguration();                                                               \
-    Size2D size(_size);                                                                                     \
-    if (CONTSRC##n                                                                                          \
-        dstStride == (ptrdiff_t)(size.width))                                                               \
-    {                                                                                                       \
-        size.width *= size.height;                                                                          \
-        size.height = 1;                                                                                    \
-    }                                                                                                       \
-    typedef internal::VecTraits<sgn##64, n>::vec64 vec64;                                                   \
-                                                                                                            \
-    for (size_t i = 0u; i < size.height; ++i)                                                               \
-    {                                                                                                       \
-        FILL_LINES##n(VROW, sgn##64)                                                                        \
-        sgn##64 * dst = internal::getRowPtr(dstBase, dstStride, i);                                         \
-        size_t sj = 0u, dj = 0u;                                                                            \
-                                                                                                            \
-        for (; sj < size.width; ++sj, dj += n)                                                              \
-        {                                                                                                   \
-            vec64 v_dst;                                                                                    \
-            FILL_LINES##n(VLD1, sgn##64)                                                                    \
-            vst##n##_##sgn##64(dst + dj, v_dst);                                                            \
-            /*FILL_LINES##n(SLD, sgn##64)*/                                                                 \
-        }                                                                                                   \
-    }                                                                                                       \
-}
-
-#else
-
-#define  VOID_LINE(type, n) (void)src##n##Base; (void)src##n##Stride;
-
-#define COMBINE(sgn,bits,n) void combine##n(const Size2D &size                                              \
-                                        FILL_LINES##n(FARG, sgn##bits),                                     \
-                                        sgn##bits * dstBase, ptrdiff_t dstStride)                           \
-{                                                                                                           \
-    internal::assertSupportedConfiguration();                                                               \
-    (void)size;                                                                                             \
-    FILL_LINES##n(VOID, sgn##bits)                                                                          \
-    (void)dstBase;                                                                                          \
-    (void)dstStride;                                                                                        \
-}
-#define COMBINE64(sgn,n) COMBINE(sgn,64,n)
-
-#endif //CAROTENE_NEON
-
-COMBINE(u, 8,2)
-COMBINE(u, 8,3)
-COMBINE(u, 8,4)
-COMBINE(u,16,2)
-COMBINE(u,16,3)
-COMBINE(u,16,4)
-COMBINE(s,32,2)
-COMBINE(s,32,3)
-COMBINE(s,32,4)
-COMBINE64(s, 2)
-COMBINE64(s, 3)
-COMBINE64(s, 4)
-
-void combineYUYV(const Size2D &size,
-                 const u8 * srcyBase, ptrdiff_t srcyStride,
-                 const u8 * srcuBase, ptrdiff_t srcuStride,
-                 const u8 * srcvBase, ptrdiff_t srcvStride,
-                 u8 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-#ifndef ANDROID
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-#endif
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0u; i < size.height; i += 1)
-    {
-        const u8 * srcy = internal::getRowPtr(srcyBase, srcyStride, i);
-        const u8 * srcu = internal::getRowPtr(srcuBase, srcuStride, i);
-        const u8 * srcv = internal::getRowPtr(srcvBase, srcvStride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t syj = 0u, sj = 0u, dj = 0u;
-
-#ifndef ANDROID
-        for (; sj < roiw32; sj += 32, syj += 64, dj += 128)
-        {
-            internal::prefetch(srcy + syj);
-            internal::prefetch(srcu + sj);
-            internal::prefetch(srcv + sj);
-
-            uint8x16x2_t v_y = vld2q_u8(srcy + syj);
-            uint8x16x4_t v_dst;
-            v_dst.val[0] = v_y.val[0];
-            v_dst.val[1] = vld1q_u8(srcu + sj);
-            v_dst.val[2] = v_y.val[1];
-            v_dst.val[3] = vld1q_u8(srcv + sj);
-            vst4q_u8(dst + dj, v_dst);
-
-            v_y = vld2q_u8(srcy + syj + 32);
-            v_dst.val[0] = v_y.val[0];
-            v_dst.val[1] = vld1q_u8(srcu + sj + 16);
-            v_dst.val[2] = v_y.val[1];
-            v_dst.val[3] = vld1q_u8(srcv + sj + 16);
-            vst4q_u8(dst + dj + 64, v_dst);
-        }
-#endif
-
-        for (; sj < roiw8; sj += 8, syj += 16, dj += 32)
-        {
-            uint8x8x2_t v_y = vld2_u8(srcy + syj);
-            uint8x8x4_t v_dst;
-            v_dst.val[0] = v_y.val[0];
-            v_dst.val[1] = vld1_u8(srcu + sj);
-            v_dst.val[2] = v_y.val[1];
-            v_dst.val[3] = vld1_u8(srcv + sj);
-            vst4_u8(dst + dj, v_dst);
-        }
-
-        for (; sj < size.width; ++sj, syj += 2, dj += 4)
-        {
-            dst[dj] = srcy[syj];
-            dst[dj + 1] = srcu[sj];
-            dst[dj + 2] = srcy[syj + 1];
-            dst[dj + 3] = srcv[sj];
-        }
-    }
-#else
-    (void)size;
-    (void)srcyBase;
-    (void)srcyStride;
-    (void)srcuBase;
-    (void)srcuStride;
-    (void)srcvBase;
-    (void)srcvStride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void combineUYVY(const Size2D &size,
-                 const u8 * srcyBase, ptrdiff_t srcyStride,
-                 const u8 * srcuBase, ptrdiff_t srcuStride,
-                 const u8 * srcvBase, ptrdiff_t srcvStride,
-                 u8 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-#ifndef ANDROID
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-#endif
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0u; i < size.height; ++i)
-    {
-        const u8 * srcy = internal::getRowPtr(srcyBase, srcyStride, i);
-        const u8 * srcu = internal::getRowPtr(srcuBase, srcuStride, i);
-        const u8 * srcv = internal::getRowPtr(srcvBase, srcvStride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t syj = 0u, sj = 0u, dj = 0u;
-
-#ifndef ANDROID
-        for (; sj < roiw32; sj += 32, syj += 64, dj += 128)
-        {
-            internal::prefetch(srcy + syj);
-            internal::prefetch(srcu + sj);
-            internal::prefetch(srcv + sj);
-
-            uint8x16x2_t v_y = vld2q_u8(srcy + syj);
-            uint8x16x4_t v_dst;
-            v_dst.val[0] = vld1q_u8(srcu + sj);
-            v_dst.val[1] = v_y.val[0];
-            v_dst.val[2] = vld1q_u8(srcv + sj);
-            v_dst.val[3] = v_y.val[1];
-            vst4q_u8(dst + dj, v_dst);
-
-            v_y = vld2q_u8(srcy + syj + 32);
-            v_dst.val[0] = vld1q_u8(srcu + sj + 16);
-            v_dst.val[1] = v_y.val[0];
-            v_dst.val[2] = vld1q_u8(srcv + sj + 16);
-            v_dst.val[3] = v_y.val[1];
-            vst4q_u8(dst + dj + 64, v_dst);
-        }
-#endif
-
-        for (; sj < roiw8; sj += 8, syj += 16, dj += 32)
-        {
-            uint8x8x2_t v_y = vld2_u8(srcy + syj);
-            uint8x8x4_t v_dst;
-            v_dst.val[0] = vld1_u8(srcu + sj);
-            v_dst.val[1] = v_y.val[0];
-            v_dst.val[2] = vld1_u8(srcv + sj);
-            v_dst.val[3] = v_y.val[1];
-            vst4_u8(dst + dj, v_dst);
-        }
-
-        for (; sj < size.width; ++sj, syj += 2, dj += 4)
-        {
-            dst[dj] = srcu[sj];
-            dst[dj + 1] = srcy[syj];
-            dst[dj + 2] = srcv[sj];
-            dst[dj + 3] = srcy[syj + 1];
-        }
-    }
-#else
-    (void)size;
-    (void)srcyBase;
-    (void)srcyStride;
-    (void)srcuBase;
-    (void)srcuStride;
-    (void)srcvBase;
-    (void)srcvStride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/cmp.cpp
+++ b/3rdparty/carotene/src/cmp.cpp
@ -1,340 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
-inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
-inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
-
-template <typename Op, int elsize> struct vtail
-{
-    static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
-                               u8 * dst, const Op & op,
-                               size_t &x, size_t width)
-    {
-        //do nothing since there couldn't be enough data
-        (void)src0;
-        (void)src1;
-        (void)dst;
-        (void)op;
-        (void)x;
-        (void)width;
-    }
-};
-template <typename Op> struct vtail<Op, 2>
-{
-    static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
-                               u8 * dst, const Op & op,
-                               size_t &x, size_t width)
-    {
-        typedef typename Op::type type;
-        typedef typename internal::VecTraits<type>::vec128 vec128;
-        typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
-        //There no more than 15 elements in the tail, so we could handle 8 element vector only once
-        if( x + 8 < width)
-        {
-            vec128  v_src0, v_src1;
-            uvec128 v_dst;
-
-            v_src0 = internal::vld1q(src0 + x);
-            v_src1 = internal::vld1q(src1 + x);
-            op(v_src0, v_src1, v_dst);
-            internal::vst1(dst + x, internal::vmovn(v_dst));
-            x+=8;
-        }
-    }
-};
-template <typename Op> struct vtail<Op, 1>
-{
-    static inline void compare(const typename Op::type * src0, const typename Op::type * src1,
-                               u8 * dst, const Op & op,
-                               size_t &x, size_t width)
-    {
-        typedef typename Op::type type;
-        typedef typename internal::VecTraits<type>::vec128 vec128;
-        typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
-        typedef typename internal::VecTraits<type>::vec64 vec64;
-        typedef typename internal::VecTraits<type>::unsign::vec64 uvec64;
-        //There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
-        if( x + 16 < width)
-        {
-            vec128  v_src0, v_src1;
-            uvec128 v_dst;
-
-            v_src0 = internal::vld1q(src0 + x);
-            v_src1 = internal::vld1q(src1 + x);
-            op(v_src0, v_src1, v_dst);
-            internal::vst1q(dst + x, v_dst);
-            x+=16;
-        }
-        if( x + 8 < width)
-        {
-            vec64  v_src0, v_src1;
-            uvec64 v_dst;
-
-            v_src0 = internal::vld1(src0 + x);
-            v_src1 = internal::vld1(src1 + x);
-            op(v_src0, v_src1, v_dst);
-            internal::vst1(dst + x, v_dst);
-            x+=8;
-        }
-    }
-};
-
-template <typename Op>
-void vcompare(Size2D size,
-              const typename Op::type * src0Base, ptrdiff_t src0Stride,
-              const typename Op::type * src1Base, ptrdiff_t src1Stride,
-              u8 * dstBase, ptrdiff_t dstStride, const Op & op)
-{
-    typedef typename Op::type type;
-    typedef typename internal::VecTraits<type>::vec128 vec128;
-    typedef typename internal::VecTraits<type>::unsign::vec128 uvec128;
-
-    if (src0Stride == src1Stride && src0Stride == dstStride &&
-        src0Stride == (ptrdiff_t)(size.width * sizeof(type)))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-
-    const u32 step_base = 32 / sizeof(type);
-    size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
-
-    for (size_t y = 0; y < size.height; ++y)
-    {
-        const type * src0 = internal::getRowPtr(src0Base, src0Stride, y);
-        const type * src1 = internal::getRowPtr(src1Base, src1Stride, y);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, y);
-        size_t x = 0;
-
-        for( ; x < roiw_base; x += step_base )
-        {
-            internal::prefetch(src0 + x);
-            internal::prefetch(src1 + x);
-
-            vec128 v_src00 = internal::vld1q(src0 + x), v_src01 = internal::vld1q(src0 + x + 16 / sizeof(type));
-            vec128 v_src10 = internal::vld1q(src1 + x), v_src11 = internal::vld1q(src1 + x + 16 / sizeof(type));
-            uvec128 v_dst0;
-            uvec128 v_dst1;
-
-            op(v_src00, v_src10, v_dst0);
-            op(v_src01, v_src11, v_dst1);
-
-            vnst(dst + x, v_dst0, v_dst1);
-        }
-
-        vtail<Op, sizeof(type)>::compare(src0, src1, dst, op, x, size.width);
-
-        for (; x < size.width; ++x)
-        {
-            op(src0 + x, src1 + x, dst + x);
-        }
-    }
-}
-
-template<typename T>
-struct OpCmpEQ
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
-    {
-        v_dst = internal::vceqq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
-    {
-        v_dst = internal::vceq(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, u8 * dst) const
-    {
-        dst[0] = src0[0] == src1[0] ? 255 : 0;
-    }
-};
-
-template<typename T>
-struct OpCmpNE
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
-    {
-        v_dst = internal::vmvnq(internal::vceqq(v_src0, v_src1));
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
-    {
-        v_dst = internal::vmvn(internal::vceq(v_src0, v_src1));
-    }
-
-    void operator() (const T * src0, const T * src1, u8 * dst) const
-    {
-        dst[0] = src0[0] == src1[0] ? 0 : 255;
-    }
-};
-
-template<typename T>
-struct OpCmpGT
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
-    {
-        v_dst = internal::vcgtq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
-    {
-        v_dst = internal::vcgt(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, u8 * dst) const
-    {
-        dst[0] = src0[0] > src1[0] ? 255 : 0;
-    }
-};
-
-template<typename T>
-struct OpCmpGE
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0, const typename internal::VecTraits<T>::vec128 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec128 & v_dst) const
-    {
-        v_dst = internal::vcgeq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0, const typename internal::VecTraits<T>::vec64 & v_src1,
-              typename internal::VecTraits<T>::unsign::vec64 & v_dst) const
-    {
-        v_dst = internal::vcge(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, u8 * dst) const
-    {
-        dst[0] = src0[0] >= src1[0] ? 255 : 0;
-    }
-};
-
-}
-
-#define IMPL_CMPOP(op, type)                              \
-void cmp##op(const Size2D &size,                          \
-             const type * src0Base, ptrdiff_t src0Stride, \
-             const type * src1Base, ptrdiff_t src1Stride, \
-                       u8 *dstBase, ptrdiff_t dstStride)  \
-{                                                         \
-    internal::assertSupportedConfiguration();             \
-    vcompare(size,                                        \
-             src0Base, src0Stride,                        \
-             src1Base, src1Stride,                        \
-             dstBase, dstStride,                          \
-             OpCmp##op<type>());                          \
-}
-
-#else
-
-#define IMPL_CMPOP(op, type)                              \
-void cmp##op(const Size2D &size,                          \
-             const type * src0Base, ptrdiff_t src0Stride, \
-             const type * src1Base, ptrdiff_t src1Stride, \
-             u8 *dstBase, ptrdiff_t dstStride)            \
-{                                                         \
-    internal::assertSupportedConfiguration();             \
-    (void)size;                                           \
-    (void)src0Base;                                       \
-    (void)src0Stride;                                     \
-    (void)src1Base;                                       \
-    (void)src1Stride;                                     \
-    (void)dstBase;                                        \
-    (void)dstStride;                                      \
-}
-
-#endif
-
-IMPL_CMPOP(EQ, u8)
-IMPL_CMPOP(EQ, s8)
-IMPL_CMPOP(EQ, u16)
-IMPL_CMPOP(EQ, s16)
-IMPL_CMPOP(EQ, u32)
-IMPL_CMPOP(EQ, s32)
-IMPL_CMPOP(EQ, f32)
-
-IMPL_CMPOP(NE, u8)
-IMPL_CMPOP(NE, s8)
-IMPL_CMPOP(NE, u16)
-IMPL_CMPOP(NE, s16)
-IMPL_CMPOP(NE, u32)
-IMPL_CMPOP(NE, s32)
-IMPL_CMPOP(NE, f32)
-
-IMPL_CMPOP(GT, u8)
-IMPL_CMPOP(GT, s8)
-IMPL_CMPOP(GT, u16)
-IMPL_CMPOP(GT, s16)
-IMPL_CMPOP(GT, u32)
-IMPL_CMPOP(GT, s32)
-IMPL_CMPOP(GT, f32)
-
-IMPL_CMPOP(GE, u8)
-IMPL_CMPOP(GE, s8)
-IMPL_CMPOP(GE, u16)
-IMPL_CMPOP(GE, s16)
-IMPL_CMPOP(GE, u32)
-IMPL_CMPOP(GE, s32)
-IMPL_CMPOP(GE, f32)
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/colorconvert.cpp
+++ b/3rdparty/carotene/src/colorconvert.cpp
--- a/3rdparty/carotene/src/common.cpp
+++ b/3rdparty/carotene/src/common.cpp
@ -1,108 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include <cstdlib>
-#include <iostream>
-
-#include "common.hpp"
-
-namespace CAROTENE_NS {
-
-bool isSupportedConfiguration()
-{
-#ifdef CAROTENE_NEON
-    return true;
-#else
-    return false;
-#endif
-}
-
-namespace internal {
-
-void assertSupportedConfiguration(bool parametersSupported)
-{
-    if (!isSupportedConfiguration()) {
-        std::cerr << "internal error: attempted to use an unavailable function" << std::endl;
-        std::abort();
-    }
-
-    if (!parametersSupported) {
-        std::cerr << "internal error: attempted to use a function with unsupported parameters" << std::endl;
-        std::abort();
-    }
-}
-
-ptrdiff_t borderInterpolate(ptrdiff_t _p, size_t _len, BORDER_MODE borderType, size_t startMargin, size_t endMargin)
-{
-    ptrdiff_t p = _p + (ptrdiff_t)startMargin;
-    size_t len = _len + startMargin + endMargin;
-    if( (size_t)p < len )
-        return _p;
-    else if( borderType == BORDER_MODE_REPLICATE )
-        p = p < 0 ? 0 : (ptrdiff_t)len - 1;
-    else if( borderType == BORDER_MODE_REFLECT || borderType == BORDER_MODE_REFLECT101 )
-    {
-        s32 delta = borderType == BORDER_MODE_REFLECT101;
-        if( len == 1 )
-            return 0;
-        do
-        {
-            if( p < 0 )
-                p = -p - 1 + delta;
-            else
-                p = (ptrdiff_t)len - 1 - (p - (ptrdiff_t)len) - delta;
-        }
-        while( (size_t)p >= len );
-    }
-    else if( borderType == BORDER_MODE_WRAP )
-    {
-        if( p < 0 )
-            p -= ((p-(ptrdiff_t)len+1)/(ptrdiff_t)len)*(ptrdiff_t)len;
-        if( p >= (ptrdiff_t)len )
-            p %= (ptrdiff_t)len;
-    }
-    else if( borderType == BORDER_MODE_CONSTANT )
-        p = -1;
-    else
-        internal::assertSupportedConfiguration(false);
-    return p - (ptrdiff_t)startMargin;
-}
-
-} // namespace internal
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/common.hpp
+++ b/3rdparty/carotene/src/common.hpp
@ -1,96 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#ifndef CAROTENE_SRC_COMMON_HPP
-#define CAROTENE_SRC_COMMON_HPP
-
-#include <cstddef>
-#include <algorithm>
-
-#if defined WITH_NEON && (defined __ARM_NEON__ || defined __ARM_NEON)
-#define CAROTENE_NEON
-#endif
-
-#ifdef CAROTENE_NEON
-#include <arm_neon.h>
-#include "intrinsics.hpp"
-#endif
-
-#include <carotene/functions.hpp>
-#include "saturate_cast.hpp"
-
-namespace CAROTENE_NS { namespace internal {
-
-inline void prefetch(const void *ptr, size_t offset = 32*10)
-{
-#if defined __GNUC__
-    __builtin_prefetch(reinterpret_cast<const char*>(ptr) + offset);
-#elif defined _MSC_VER && defined CAROTENE_NEON
-    __prefetch(reinterpret_cast<const char*>(ptr) + offset);
-#else
-    (void)ptr;
-    (void)offset;
-#endif
-}
-
-template <typename T>
-inline T *getRowPtr(T *base, ptrdiff_t stride, size_t row)
-{
-    char *baseRaw = const_cast<char *>(reinterpret_cast<const char *>(base));
-    return reinterpret_cast<T *>(baseRaw + ptrdiff_t(row) * stride);
-}
-
-void assertSupportedConfiguration(bool parametersSupported = true);
-
-ptrdiff_t borderInterpolate(ptrdiff_t _p, size_t _len, BORDER_MODE borderType, size_t startMargin = 0, size_t endMargin = 0);
-
-/*!
- *  Aligns pointer by the certain number of bytes
- *
- *  This small inline function aligns the pointer by the certain number of bytes by shifting
- *  it forward by 0 or a positive offset.
- */
-template<typename T> inline T* alignPtr(T* ptr, size_t n=sizeof(T))
-{
-    return (T*)(((size_t)ptr + n-1) & -n);
-}
-
-}}
-
-#endif
--- a/3rdparty/carotene/src/convert.cpp
+++ b/3rdparty/carotene/src/convert.cpp
--- a/3rdparty/carotene/src/convert_depth.cpp
+++ b/3rdparty/carotene/src/convert_depth.cpp
@ -1,399 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <int shift>
-void lshiftConst(const Size2D &size,
-                 const u8 * srcBase, ptrdiff_t srcStride,
-                 s16 * dstBase, ptrdiff_t dstStride)
-{
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-        s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            uint8x16_t v_src = vld1q_u8(src + j);
-            int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));
-            int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));
-
-            vst1q_s16(dst + j, vshlq_n_s16(v_dst0, shift));
-            vst1q_s16(dst + j + 8, vshlq_n_s16(v_dst1, shift));
-        }
-        for (; j < roiw8; j += 8)
-        {
-            int16x8_t v_dst = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));
-            vst1q_s16(dst + j, vshlq_n_s16(v_dst, shift));
-        }
-
-        for (; j < size.width; j++)
-        {
-            dst[j] = ((s16)src[j] << shift);
-        }
-    }
-}
-
-template <>
-void lshiftConst<0>(const Size2D &size,
-                    const u8 * srcBase, ptrdiff_t srcStride,
-                    s16 * dstBase, ptrdiff_t dstStride)
-{
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-        s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            uint8x16_t v_src = vld1q_u8(src + j);
-            int16x8_t v_dst0 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src)));
-            int16x8_t v_dst1 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src)));
-
-            vst1q_s16(dst + j, v_dst0);
-            vst1q_s16(dst + j + 8, v_dst1);
-        }
-        for (; j < roiw8; j += 8)
-        {
-            int16x8_t v_dst = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src + j)));
-            vst1q_s16(dst + j, v_dst);
-        }
-
-        for (; j < size.width; j++)
-        {
-            dst[j] = (s16)src[j];
-        }
-    }
-}
-
-template <int shift>
-void rshiftConst(const Size2D &size,
-                 const s16 * srcBase, ptrdiff_t srcStride,
-                 u8 * dstBase, ptrdiff_t dstStride,
-                 CONVERT_POLICY cpolicy)
-{
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const s16 * src = internal::getRowPtr(srcBase, srcStride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        if (cpolicy == CONVERT_POLICY_SATURATE)
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src + j);
-                int16x8_t v_src0 = vshrq_n_s16(vld1q_s16(src + j), shift),
-                          v_src1 = vshrq_n_s16(vld1q_s16(src + j + 8), shift);
-                uint8x16_t v_dst = vcombine_u8(vqmovun_s16(v_src0),
-                                               vqmovun_s16(v_src1));
-                vst1q_u8(dst + j, v_dst);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src = vshrq_n_s16(vld1q_s16(src + j), shift);
-                vst1_u8(dst + j, vqmovun_s16(v_src));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = internal::saturate_cast<u8>((src[j] >> shift));
-            }
-        }
-        else // CONVERT_POLICY_WRAP
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src + j);
-                int16x8_t v_src0 = vshrq_n_s16(vld1q_s16(src + j), shift),
-                          v_src1 = vshrq_n_s16(vld1q_s16(src + j + 8), shift);
-                int8x16_t v_dst = vcombine_s8(vmovn_s16(v_src0),
-                                              vmovn_s16(v_src1));
-                vst1q_u8(dst + j, vreinterpretq_u8_s8(v_dst));
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src = vshrq_n_s16(vld1q_s16(src + j), shift);
-                vst1_u8(dst + j, vreinterpret_u8_s8(vmovn_s16(v_src)));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = (u8)((src[j] >> shift));
-            }
-        }
-    }
-}
-
-template <>
-void rshiftConst<0>(const Size2D &size,
-                    const s16 * srcBase, ptrdiff_t srcStride,
-                    u8 * dstBase, ptrdiff_t dstStride,
-                    CONVERT_POLICY cpolicy)
-{
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const s16 * src = internal::getRowPtr(srcBase, srcStride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        if (cpolicy == CONVERT_POLICY_SATURATE)
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src + j);
-                int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);
-                uint8x16_t v_dst = vcombine_u8(vqmovun_s16(v_src0), vqmovun_s16(v_src1));
-                vst1q_u8(dst + j, v_dst);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src = vld1q_s16(src + j);
-                vst1_u8(dst + j, vqmovun_s16(v_src));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = internal::saturate_cast<u8>(src[j]);
-            }
-        }
-        else // CONVERT_POLICY_WRAP
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src + j);
-                int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);
-                int8x16_t v_dst = vcombine_s8(vmovn_s16(v_src0), vmovn_s16(v_src1));
-                vst1q_u8(dst + j, vreinterpretq_u8_s8(v_dst));
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src = vld1q_s16(src + j);
-                vst1_u8(dst + j, vreinterpret_u8_s8(vmovn_s16(v_src)));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = (u8)src[j];
-            }
-        }
-    }
-}
-
-typedef void (* lshiftConstFunc)(const Size2D &size,
-                                const u8 * srcBase, ptrdiff_t srcStride,
-                                s16 * dstBase, ptrdiff_t dstStride);
-
-typedef void (* rshiftConstFunc)(const Size2D &size,
-                                const s16 * srcBase, ptrdiff_t srcStride,
-                                u8 * dstBase, ptrdiff_t dstStride,
-                                CONVERT_POLICY cpolicy);
-
-} // namespace
-
-#endif
-
-void lshift(const Size2D &size,
-            const u8 * srcBase, ptrdiff_t srcStride,
-            s16 * dstBase, ptrdiff_t dstStride,
-            u32 shift)
-{
-    internal::assertSupportedConfiguration();
-
-#ifdef CAROTENE_NEON
-    if (shift >= 16u)
-    {
-        for (size_t i = 0; i < size.height; ++i)
-        {
-            s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-            std::memset(dst, 0, sizeof(s16) * size.width);
-        }
-        return;
-    }
-
-    // this ugly contruction is needed to avoid:
-    // /usr/lib/gcc/arm-linux-gnueabihf/4.8/include/arm_neon.h:3581:59: error: argument must be a constant
-    // return (int16x8_t)__builtin_neon_vshl_nv8hi (__a, __b, 1);
-
-    lshiftConstFunc funcs[16] =
-    {
-        lshiftConst<0>,
-        lshiftConst<1>,
-        lshiftConst<2>,
-        lshiftConst<3>,
-        lshiftConst<4>,
-        lshiftConst<5>,
-        lshiftConst<6>,
-        lshiftConst<7>,
-        lshiftConst<8>,
-        lshiftConst<9>,
-        lshiftConst<10>,
-        lshiftConst<11>,
-        lshiftConst<12>,
-        lshiftConst<13>,
-        lshiftConst<14>,
-        lshiftConst<15>
-    }, func = funcs[shift];
-
-    func(size, srcBase, srcStride, dstBase, dstStride);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)shift;
-#endif
-}
-
-void rshift(const Size2D &size,
-            const s16 * srcBase, ptrdiff_t srcStride,
-            u8 * dstBase, ptrdiff_t dstStride,
-            u32 shift, CONVERT_POLICY cpolicy)
-{
-    internal::assertSupportedConfiguration();
-
-#ifdef CAROTENE_NEON
-    if (shift >= 16)
-    {
-        if (cpolicy == CONVERT_POLICY_WRAP)
-        {
-            size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-            size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-            int16x8_t v_zero = vdupq_n_s16(0);
-
-            for (size_t i = 0; i < size.height; ++i)
-            {
-                const s16 * src = internal::getRowPtr(srcBase, srcStride, i);
-                u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-                size_t j = 0;
-
-                for (; j < roiw16; j += 16)
-                {
-                    internal::prefetch(src + j);
-                    int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);
-                    uint8x16_t v_dst = vcombine_u8(vmovn_u16(vcltq_s16(v_src0, v_zero)),
-                                                   vmovn_u16(vcltq_s16(v_src1, v_zero)));
-                    vst1q_u8(dst + j, v_dst);
-                }
-                for (; j < roiw8; j += 8)
-                {
-                    int16x8_t v_src = vld1q_s16(src + j);
-                    vst1_u8(dst + j, vmovn_u16(vcltq_s16(v_src, v_zero)));
-                }
-
-                for (; j < size.width; j++)
-                {
-                    dst[j] = src[j] >= 0 ? 0 : 255;
-                }
-            }
-        }
-        else
-        {
-            for (size_t i = 0; i < size.height; ++i)
-            {
-                u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-                std::memset(dst, 0, sizeof(u8) * size.width);
-            }
-        }
-        return;
-    }
-
-    // this ugly contruction is needed to avoid:
-    // /usr/lib/gcc/arm-linux-gnueabihf/4.8/include/arm_neon.h:3581:59: error: argument must be a constant
-    // return (int16x8_t)__builtin_neon_vshr_nv8hi (__a, __b, 1);
-
-    rshiftConstFunc funcs[16] =
-    {
-        rshiftConst<0>,
-        rshiftConst<1>,
-        rshiftConst<2>,
-        rshiftConst<3>,
-        rshiftConst<4>,
-        rshiftConst<5>,
-        rshiftConst<6>,
-        rshiftConst<7>,
-        rshiftConst<8>,
-        rshiftConst<9>,
-        rshiftConst<10>,
-        rshiftConst<11>,
-        rshiftConst<12>,
-        rshiftConst<13>,
-        rshiftConst<14>,
-        rshiftConst<15>
-    }, func = funcs[shift];
-
-    func(size, srcBase, srcStride, dstBase, dstStride, cpolicy);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)shift;
-    (void)cpolicy;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/convert_scale.cpp
+++ b/3rdparty/carotene/src/convert_scale.cpp
--- a/3rdparty/carotene/src/convolution.cpp
+++ b/3rdparty/carotene/src/convolution.cpp
@ -1,340 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "saturate_cast.hpp"
-
-namespace CAROTENE_NS {
-
-bool isConvolutionSupported(const Size2D &size, const Size2D &ksize,
-                            BORDER_MODE border)
-{
-    return isSupportedConfiguration() && size.width >= 8 &&
-        (border == BORDER_MODE_CONSTANT ||
-            border == BORDER_MODE_REPLICATE) &&
-        (ksize.width == 3) && (ksize.height == 3);
-}
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <int shift>
-int32x4_t vshrq_s32(int32x4_t value)
-{
-    return vshrq_n_s32(value, shift);
-}
-
-template <>
-int32x4_t vshrq_s32<0>(int32x4_t value)
-{
-    return value;
-}
-
-} // namespace
-
-typedef int32x4_t (* vshrq_s32_func)(int32x4_t value);
-
-#endif
-
-void convolution(const Size2D &size,
-                 const u8 * srcBase, ptrdiff_t srcStride,
-                 u8 * dstBase, ptrdiff_t dstStride,
-                 BORDER_MODE border, u8 borderValue,
-                 const Size2D & ksize, s16 * kernelBase, u32 scale)
-{
-    internal::assertSupportedConfiguration(isConvolutionSupported(size, ksize, border));
-#ifdef CAROTENE_NEON
-    const uint8x8_t v_zero_u8 = vdup_n_u8(0);
-    const uint8x8_t v_border = vdup_n_u8(borderValue);
-    const int32x4_t v_zero_s32 = vdupq_n_s32(0);
-
-    uint8x8_t tprev[3] = { v_zero_u8, v_zero_u8, v_zero_u8 },
-              tcurr[3] = { v_zero_u8, v_zero_u8, v_zero_u8 },
-              tnext[3] = { v_zero_u8, v_zero_u8, v_zero_u8 };
-    uint8x8_t t0 = v_zero_u8, t1 = v_zero_u8, t2 = v_zero_u8;
-
-    ptrdiff_t width = (ptrdiff_t)size.width, height = (ptrdiff_t)size.height;
-    static const vshrq_s32_func vshrq_s32_a[33] =
-    {
-        vshrq_s32<0>,
-        vshrq_s32<1>,
-        vshrq_s32<2>,
-        vshrq_s32<3>,
-        vshrq_s32<4>,
-        vshrq_s32<5>,
-        vshrq_s32<6>,
-        vshrq_s32<7>,
-        vshrq_s32<8>,
-        vshrq_s32<9>,
-        vshrq_s32<10>,
-        vshrq_s32<11>,
-        vshrq_s32<12>,
-        vshrq_s32<13>,
-        vshrq_s32<14>,
-        vshrq_s32<15>,
-        vshrq_s32<16>,
-        vshrq_s32<17>,
-        vshrq_s32<18>,
-        vshrq_s32<19>,
-        vshrq_s32<20>,
-        vshrq_s32<21>,
-        vshrq_s32<22>,
-        vshrq_s32<23>,
-        vshrq_s32<24>,
-        vshrq_s32<25>,
-        vshrq_s32<26>,
-        vshrq_s32<27>,
-        vshrq_s32<28>,
-        vshrq_s32<29>,
-        vshrq_s32<30>,
-        vshrq_s32<31>,
-        vshrq_s32<32>
-    };
-    vshrq_s32_func vshrq_s32_p = vshrq_s32_a[scale];
-
-    for (ptrdiff_t y = 0; y < height; ++y)
-    {
-        const u8 * srow0 = y == 0 && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::max<ptrdiff_t>(y - 1, 0));
-        const u8 * srow1 = internal::getRowPtr(srcBase, srcStride, y);
-        const u8 * srow2 = y + 1 == height && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::min(y + 1, height - 1));
-        u8 * drow = internal::getRowPtr(dstBase, dstStride, y);
-
-        u8 prevx[3] = { 0, 0, 0 },
-           currx[3] = { 0, 0, 0 },
-           nextx[3] = { 0, 0, 0 };
-        ptrdiff_t x = 0;
-        const ptrdiff_t bwidth = y + 2 < height ? width : (width - 8);
-
-        // perform vertical convolution
-        for ( ; x <= bwidth; x += 8)
-        {
-            internal::prefetch(srow0 + x);
-            internal::prefetch(srow1 + x);
-            internal::prefetch(srow2 + x);
-
-            uint8x8_t x0 = !srow0 ? v_border : vld1_u8(srow0 + x);
-            uint8x8_t x1 = vld1_u8(srow1 + x);
-            uint8x8_t x2 = !srow2 ? v_border : vld1_u8(srow2 + x);
-
-            // calculate values for plain CPU part below if needed
-            if (x + 8 >= bwidth)
-            {
-                ptrdiff_t x3 = x == width ? width - 1 : x;
-                ptrdiff_t x4 = border == BORDER_MODE_CONSTANT ? x3 - 1 : std::max<ptrdiff_t>(x3 - 1, 0);
-
-                if (border == BORDER_MODE_CONSTANT && x4 < 0)
-                    prevx[0] = prevx[1] = prevx[2] = borderValue;
-                else
-                {
-                    prevx[0] = srow0 ? srow0[x4] : borderValue;
-                    prevx[1] =         srow1[x4]              ;
-                    prevx[2] = srow2 ? srow2[x4] : borderValue;
-                }
-
-                currx[0] = srow0 ? srow0[x3] : borderValue;
-                currx[1] =         srow1[x3]              ;
-                currx[2] = srow2 ? srow2[x3] : borderValue;
-            }
-
-            // make shift
-            if (x)
-            {
-                tprev[0] = tcurr[0];
-                tcurr[0] = tnext[0];
-
-                tprev[1] = tcurr[1];
-                tcurr[1] = tnext[1];
-
-                tprev[2] = tcurr[2];
-                tcurr[2] = tnext[2];
-            }
-
-            tnext[0] = x0;
-            tnext[1] = x1;
-            tnext[2] = x2;
-
-            // make extrapolation for the first elements
-            if (!x)
-            {
-                // make border
-                if (border == BORDER_MODE_CONSTANT)
-                    tcurr[0] = tcurr[1] = tcurr[2] = v_border;
-                else if (border == BORDER_MODE_REPLICATE)
-                {
-                    tcurr[0] = vdup_n_u8(vget_lane_u8(tnext[0], 0));
-                    tcurr[1] = vdup_n_u8(vget_lane_u8(tnext[1], 0));
-                    tcurr[2] = vdup_n_u8(vget_lane_u8(tnext[2], 0));
-                }
-
-                continue;
-            }
-
-            int32x4_t v_dst0 = v_zero_s32, v_dst1 = v_zero_s32;
-
-            {
-                // combine 3 "shifted" vectors
-                t0 = vext_u8(tprev[0], tcurr[0], 7);
-                t1 = tcurr[0];
-                t2 = vext_u8(tcurr[0], tnext[0], 1);
-
-                int16x8_t t0_16s = vreinterpretq_s16_u16(vmovl_u8(t0));
-                int16x8_t t1_16s = vreinterpretq_s16_u16(vmovl_u8(t1));
-                int16x8_t t2_16s = vreinterpretq_s16_u16(vmovl_u8(t2));
-
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t0_16s), kernelBase[8]);
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t1_16s), kernelBase[7]);
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t2_16s), kernelBase[6]);
-
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t0_16s), kernelBase[8]);
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t1_16s), kernelBase[7]);
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t2_16s), kernelBase[6]);
-            }
-
-            {
-                // combine 3 "shifted" vectors
-                t0 = vext_u8(tprev[1], tcurr[1], 7);
-                t1 = tcurr[1];
-                t2 = vext_u8(tcurr[1], tnext[1], 1);
-
-                int16x8_t t0_16s = vreinterpretq_s16_u16(vmovl_u8(t0));
-                int16x8_t t1_16s = vreinterpretq_s16_u16(vmovl_u8(t1));
-                int16x8_t t2_16s = vreinterpretq_s16_u16(vmovl_u8(t2));
-
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t0_16s), kernelBase[5]);
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t1_16s), kernelBase[4]);
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t2_16s), kernelBase[3]);
-
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t0_16s), kernelBase[5]);
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t1_16s), kernelBase[4]);
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t2_16s), kernelBase[3]);
-            }
-
-            {
-                // combine 3 "shifted" vectors
-                t0 = vext_u8(tprev[2], tcurr[2], 7);
-                t1 = tcurr[2];
-                t2 = vext_u8(tcurr[2], tnext[2], 1);
-
-                int16x8_t t0_16s = vreinterpretq_s16_u16(vmovl_u8(t0));
-                int16x8_t t1_16s = vreinterpretq_s16_u16(vmovl_u8(t1));
-                int16x8_t t2_16s = vreinterpretq_s16_u16(vmovl_u8(t2));
-
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t0_16s), kernelBase[2]);
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t1_16s), kernelBase[1]);
-                v_dst0 = vmlal_n_s16(v_dst0, vget_low_s16(t2_16s), kernelBase[0]);
-
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t0_16s), kernelBase[2]);
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t1_16s), kernelBase[1]);
-                v_dst1 = vmlal_n_s16(v_dst1, vget_high_s16(t2_16s), kernelBase[0]);
-            }
-
-
-            // make scale
-            v_dst0 = vshrq_s32_p(v_dst0);
-            v_dst1 = vshrq_s32_p(v_dst1);
-
-            // and add them
-            vst1_u8(drow + x - 8, vqmovn_u16(vcombine_u16(vqmovun_s32(v_dst0),
-                                                          vqmovun_s32(v_dst1))));
-        }
-
-        x -= 8;
-        if (x == width)
-            --x;
-
-        for ( ; x < width; ++x)
-        {
-            // make extrapolation for the last elements
-            if (x + 1 >= width)
-            {
-                if (border == BORDER_MODE_CONSTANT)
-                {
-                    nextx[0] = borderValue;
-                    nextx[1] = borderValue;
-                    nextx[2] = borderValue;
-                }
-                else if (border == BORDER_MODE_REPLICATE)
-                {
-                    nextx[0] = srow0[x];
-                    nextx[1] = srow1[x];
-                    nextx[2] = srow2[x];
-                }
-            }
-            else
-            {
-                nextx[0] = srow0 ? srow0[x + 1] : borderValue;
-                nextx[1] =         srow1[x + 1]              ;
-                nextx[2] = srow2 ? srow2[x + 1] : borderValue;
-            }
-
-            s32 val = 0;
-            for (s32 _y = 0; _y < 3; ++_y)
-                val += prevx[_y] * kernelBase[(2 - _y) * 3 + 2] +
-                       currx[_y] * kernelBase[(2 - _y) * 3 + 1] +
-                       nextx[_y] * kernelBase[(2 - _y) * 3 + 0];
-
-            drow[x] = internal::saturate_cast<u8>(val >> scale);
-
-            // make shift
-            prevx[0] = currx[0];
-            currx[0] = nextx[0];
-
-            prevx[1] = currx[1];
-            currx[1] = nextx[1];
-
-            prevx[2] = currx[2];
-            currx[2] = nextx[2];
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)border;
-    (void)borderValue;
-    (void)ksize;
-    (void)kernelBase;
-    (void)scale;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/count_nonzero.cpp
+++ b/3rdparty/carotene/src/count_nonzero.cpp
@ -1,430 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include <limits>
-
-namespace CAROTENE_NS {
-
-s32 countNonZero(const Size2D &_size,
-                 const u8 * srcBase, ptrdiff_t srcStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    size_t roiw16 = size.width & ~15u;
-    s32 result = 0;
-    for(size_t k = 0; k < size.height; ++k)
-    {
-        const u8* src = internal::getRowPtr( srcBase,  srcStride, k);
-        size_t i = 0;
-
-        #define COUNTNONZERO8U_BLOCK_SIZE (16*255)
-        uint8x16_t vc1 = vmovq_n_u8(1);
-        for (; i < roiw16;)
-        {
-            size_t lim = std::min(i + COUNTNONZERO8U_BLOCK_SIZE, size.width) - 16;
-            uint8x16_t vs = vmovq_n_u8(0);
-
-            for (; i <= lim; i+= 16)
-            {
-                internal::prefetch(src + i);
-                uint8x16_t vln = vld1q_u8(src + i);
-                uint8x16_t vnz = vminq_u8(vln, vc1);
-                vs = vaddq_u8(vs, vnz);
-            }
-
-            uint32x4_t vs4 = vpaddlq_u16(vpaddlq_u8(vs));
-            uint32x2_t vs2 = vadd_u32(vget_low_u32(vs4), vget_high_u32(vs4));
-
-            s32 s[2];
-            vst1_u32((u32*)s, vs2);
-
-            if (s[0] < 0 || s[1] < 0)//saturate in case of overflow ~ 2GB of non-zeros...
-            {
-                return 0x7fFFffFF;
-            }
-            result += (s[0] += s[1]);
-            if (s[0] < 0 || result < 0)
-            {
-                return 0x7fFFffFF;
-            }
-        }
-        for (; i < size.width; i++)
-            result += (src[i] != 0)?1:0;
-        if (result < 0)//saturate in case of overflow ~ 2GB of non-zeros...
-        {
-            return 0x7fFFffFF;
-        }
-    }
-    return result;
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-
-    return 0;
-#endif
-}
-
-s32 countNonZero(const Size2D &_size,
-                 const u16 * srcBase, ptrdiff_t srcStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    size_t roiw8 = size.width & ~7u;
-    s32 result = 0;
-    for(size_t k = 0; k < size.height; ++k)
-    {
-        const u16* src = internal::getRowPtr( srcBase,  srcStride, k);
-        size_t i = 0;
-
-        #define COUNTNONZERO16U_BLOCK_SIZE (8*(256*256-1))
-        uint16x8_t vc1 = vmovq_n_u16(1);
-        for (; i < roiw8;)
-        {
-            size_t lim = std::min(i + COUNTNONZERO16U_BLOCK_SIZE, size.width) - 8;
-            uint16x8_t vs = vmovq_n_u16(0);
-
-            for (; i <= lim; i+= 8)
-            {
-                internal::prefetch(src + i);
-                uint16x8_t vln = vld1q_u16(src + i);
-                uint16x8_t vnz = vminq_u16(vln, vc1);
-                vs = vaddq_u16(vs, vnz);
-            }
-
-            uint32x4_t vs4 = vpaddlq_u16(vs);
-            uint32x2_t vs2 = vadd_u32(vget_low_u32(vs4), vget_high_u32(vs4));
-
-            s32 s[2];
-            vst1_u32((u32*)s, vs2);
-
-            if (s[0] < 0 || s[1] < 0)//saturate in case of overflow ~ 4GB of non-zeros...
-            {
-                return 0x7fFFffFF;
-            }
-            result += (s[0] += s[1]);
-            if (s[0] < 0 || result < 0)
-            {
-                return 0x7fFFffFF;
-            }
-        }
-        for (; i < size.width; i++)
-            result += (src[i] != 0)?1:0;
-        if (result < 0)//saturate in case of overflow ~ 4GB of non-zeros...
-        {
-            return 0x7fFFffFF;
-        }
-    }
-    return result;
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-
-    return 0;
-#endif
-}
-
-s32 countNonZero(const Size2D &_size,
-                 const s32 * srcBase, ptrdiff_t srcStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    size_t roiw4 = size.width & ~3u;
-    s32 result = 0;
-    for(size_t k = 0; k < size.height; ++k)
-    {
-        const u32* src = (const u32*)internal::getRowPtr( srcBase,  srcStride, k);
-        u32 i = 0;
-
-        uint32x4_t vc1 = vmovq_n_u32(1);
-        uint32x4_t vs = vmovq_n_u32(0);
-
-        for (; i < roiw4; i += 4 )
-        {
-            internal::prefetch(src + i);
-            uint32x4_t vln = vld1q_u32(src + i);
-            uint32x4_t vnz = vminq_u32(vln, vc1);
-            vs = vqaddq_u32(vs, vnz);
-        }
-
-        uint32x2_t vs2 = vqadd_u32(vget_low_u32(vs), vget_high_u32(vs));
-
-        s32 s[2];
-        vst1_u32((u32*)s, vs2);
-
-        if (s[0] < 0 || s[1] < 0)//saturate in case of overflow ~ 8GB of non-zeros...
-        {
-            return 0x7fFFffFF;
-        }
-        result += (s[0] += s[1]);
-        if (s[0] < 0 || result < 0)
-        {
-            return 0x7fFFffFF;
-        }
-
-        for (; i < size.width; i++)
-            result += (src[i] != 0)?1:0;
-        if (result < 0)//saturate in case of overflow ~ 8GB of non-zeros...
-        {
-            return 0x7fFFffFF;
-        }
-    }
-    return result;
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-
-    return 0;
-#endif
-}
-
-s32 countNonZero(const Size2D &_size,
-                 const f32 * srcBase, ptrdiff_t srcStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    size_t roiw4 = size.width & ~3u;
-    s32 result = 0;
-    for(size_t k = 0; k < size.height; ++k)
-    {
-        const f32* src = internal::getRowPtr( srcBase,  srcStride, k);
-        size_t i = 0;
-
-        float32x4_t vc0 = vmovq_n_f32(0);
-        int32x4_t vs = vmovq_n_s32(0);
-
-        for (; i < roiw4; i += 4 )
-        {
-            internal::prefetch(src + i);
-            float32x4_t vln = vld1q_f32(src + i);
-            int32x4_t vnz = vreinterpretq_s32_u32(vmvnq_u32(vceqq_f32(vln, vc0)));
-            vs = vqaddq_s32(vs, vnz);
-        }
-
-        int32x2_t vs2 = vqneg_s32(vqadd_s32(vget_low_s32(vs), vget_high_s32(vs)));
-
-        int s[2];
-        vst1_s32(s, vs2);
-
-        result += (s[0] += s[1]);
-        if (s[0] < 0 || result < 0)//case of overflow ~ 8GB of non-zeros...
-        {
-            return 0x7fFFffFF;
-        }
-
-        for (; i < size.width; i++)
-            result += (src[i] < std::numeric_limits<float>::min() && src[i] > -std::numeric_limits<float>::min())?0:1;
-
-        if (result < 0)
-        {
-            return 0x7fFFffFF;
-        }
-    }
-    return result;
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-
-    return 0;
-#endif
-}
-
-s32 countNonZero(const Size2D &_size,
-                 const f64 * srcBase, ptrdiff_t srcStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    size_t roiw8 = size.width & ~7u;
-    size_t roiw4 = size.width & ~3u;
-    size_t roiw2 = size.width & ~1u;
-    uint64x2_t vmask1 = vdupq_n_u64(0x7fFFffFFffFFffFFULL); //will treat denormals as non-zero
-    uint32x4_t vc0 = vmovq_n_u32(0);
-
-    s32 result = 0;
-    for(size_t k = 0; k < size.height; ++k)
-    {
-        const f64* src = internal::getRowPtr( srcBase,  srcStride, k);
-        size_t i = 0;
-
-        int32x2_t vs1 = vmov_n_s32(0);
-        int32x2_t vs2 = vmov_n_s32(0);
-        int32x2_t vs3 = vmov_n_s32(0);
-        int32x2_t vs4 = vmov_n_s32(0);
-
-        for (; i < roiw8; i += 8 )
-        {
-            internal::prefetch(src + i + 6);
-            uint64x2_t vln1 = vld1q_u64((const u64*)(src + i));
-            uint64x2_t vln2 = vld1q_u64((const u64*)(src + i + 2));
-            uint64x2_t vln3 = vld1q_u64((const u64*)(src + i + 4));
-            uint64x2_t vln4 = vld1q_u64((const u64*)(src + i + 6));
-
-            uint64x2_t vm1 = vandq_u64(vln1, vmask1);
-            uint64x2_t vm2 = vandq_u64(vln2, vmask1);
-            uint64x2_t vm3 = vandq_u64(vln3, vmask1);
-            uint64x2_t vm4 = vandq_u64(vln4, vmask1);
-
-            uint32x4_t vequ1 = vceqq_u32(vreinterpretq_u32_u64(vm1), vc0);
-            uint32x4_t vequ2 = vceqq_u32(vreinterpretq_u32_u64(vm2), vc0);
-            uint32x4_t vequ3 = vceqq_u32(vreinterpretq_u32_u64(vm3), vc0);
-            uint32x4_t vequ4 = vceqq_u32(vreinterpretq_u32_u64(vm4), vc0);
-
-            uint32x4_t vlx1 = vmvnq_u32(vequ1);
-            uint32x4_t vlx2 = vmvnq_u32(vequ2);
-            uint32x4_t vlx3 = vmvnq_u32(vequ3);
-            uint32x4_t vlx4 = vmvnq_u32(vequ4);
-
-            int32x2_t vnz1 = vreinterpret_s32_u32(vpmax_u32(vget_low_u32(vlx1), vget_high_u32(vlx1)));
-            int32x2_t vnz2 = vreinterpret_s32_u32(vpmax_u32(vget_low_u32(vlx2), vget_high_u32(vlx2)));
-            int32x2_t vnz3 = vreinterpret_s32_u32(vpmax_u32(vget_low_u32(vlx3), vget_high_u32(vlx3)));
-            int32x2_t vnz4 = vreinterpret_s32_u32(vpmax_u32(vget_low_u32(vlx4), vget_high_u32(vlx4)));
-
-            vs1 = vqadd_s32(vs1, vnz1);
-            vs2 = vqadd_s32(vs2, vnz2);
-            vs3 = vqadd_s32(vs3, vnz3);
-            vs4 = vqadd_s32(vs4, vnz4);
-        }
-
-        if (i < roiw4)
-        {
-            internal::prefetch(src + i + 2);
-            uint64x2_t vln1 = vld1q_u64((const u64*)(src + i));
-            uint64x2_t vln2 = vld1q_u64((const u64*)(src + i + 2));
-
-            uint64x2_t vm1 = vandq_u64(vln1, vmask1);
-            uint64x2_t vm2 = vandq_u64(vln2, vmask1);
-
-            uint32x4_t vequ1 = vceqq_u32(vreinterpretq_u32_u64(vm1), vc0);
-            uint32x4_t vequ2 = vceqq_u32(vreinterpretq_u32_u64(vm2), vc0);
-
-            uint32x4_t vlx1 = vmvnq_u32(vequ1);
-            uint32x4_t vlx2 = vmvnq_u32(vequ2);
-
-            int32x2_t vnz1 = vreinterpret_s32_u32(vpmax_u32(vget_low_u32(vlx1), vget_high_u32(vlx1)));
-            int32x2_t vnz2 = vreinterpret_s32_u32(vpmax_u32(vget_low_u32(vlx2), vget_high_u32(vlx2)));
-
-            vs1 = vqadd_s32(vs1, vnz1);
-            vs2 = vqadd_s32(vs2, vnz2);
-            i += 4;
-        }
-
-        if (i < roiw2)
-        {
-            internal::prefetch(src + i);
-            uint64x2_t vln1 = vld1q_u64((const u64*)(src + i));
-
-            uint64x2_t vm1 = vandq_u64(vln1, vmask1);
-
-            uint32x4_t vequ1 = vceqq_u32(vreinterpretq_u32_u64(vm1), vc0);
-
-            uint32x4_t vlx1 = vmvnq_u32(vequ1);
-
-            int32x2_t vnz1 = vreinterpret_s32_u32(vpmax_u32(vget_low_u32(vlx1), vget_high_u32(vlx1)));
-
-            vs1 = vqadd_s32(vs1, vnz1);
-            i += 2;
-        }
-
-        vs1 = vqadd_s32(vs1, vs2);
-        vs3 = vqadd_s32(vs3, vs4);
-        vs1 = vqadd_s32(vs1, vs3);
-        int32x2_t vsneg = vqneg_s32(vs1);
-
-        s32 s[2];
-        vst1_s32(s, vsneg);
-
-        result += (s[0] += s[1]);
-        if (s[0] < 0 || result < 0)//case of overflow ~ 16GB of non-zeros...
-        {
-            return 0x7fFFffFF;
-        }
-
-        for (; i < size.width; i++)
-            result += (src[i] < std::numeric_limits<double>::min() && src[i] > -std::numeric_limits<double>::min())?0:1;
-        if (result < 0)
-        {
-            return 0x7fFFffFF;
-        }
-    }
-    return result;
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-
-    return 0;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/div.cpp
+++ b/3rdparty/carotene/src/div.cpp
@ -1,694 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2016, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-#include <cstring>
-#include <cfloat>
-#include <cmath>
-#include <limits>
-
-namespace CAROTENE_NS {
-
-namespace {
-
-#ifdef CAROTENE_NEON
-
-template <typename T>
-inline T divSaturateQ(const T &v1, const T &v2, const float scale)
-{
-    return internal::vcombine(internal::vqmovn(divSaturateQ(internal::vmovl(internal::vget_low(v1)),
-                                                            internal::vmovl(internal::vget_low(v2)), scale)),
-                              internal::vqmovn(divSaturateQ(internal::vmovl(internal::vget_high(v1)),
-                                                            internal::vmovl(internal::vget_high(v2)), scale))
-                             );
-}
-template <>
-inline int32x4_t divSaturateQ<int32x4_t>(const int32x4_t &v1, const int32x4_t &v2, const float scale)
-{ return vcvtq_s32_f32(vmulq_f32(vmulq_n_f32(vcvtq_f32_s32(v1), scale), internal::vrecpq_f32(vcvtq_f32_s32(v2)))); }
-template <>
-inline uint32x4_t divSaturateQ<uint32x4_t>(const uint32x4_t &v1, const uint32x4_t &v2, const float scale)
-{ return vcvtq_u32_f32(vmulq_f32(vmulq_n_f32(vcvtq_f32_u32(v1), scale), internal::vrecpq_f32(vcvtq_f32_u32(v2)))); }
-
-template <typename T>
-inline T divSaturate(const T &v1, const T &v2, const float scale)
-{
-    return internal::vqmovn(divSaturateQ(internal::vmovl(v1), internal::vmovl(v2), scale));
-}
-template <>
-inline int32x2_t divSaturate<int32x2_t>(const int32x2_t &v1, const int32x2_t &v2, const float scale)
-{ return vcvt_s32_f32(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2)))); }
-template <>
-inline uint32x2_t divSaturate<uint32x2_t>(const uint32x2_t &v1, const uint32x2_t &v2, const float scale)
-{ return vcvt_u32_f32(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2)))); }
-
-
-template <typename T>
-inline T divWrapQ(const T &v1, const T &v2, const float scale)
-{
-    return internal::vcombine(internal::vmovn(divWrapQ(internal::vmovl(internal::vget_low(v1)),
-                                                       internal::vmovl(internal::vget_low(v2)), scale)),
-                              internal::vmovn(divWrapQ(internal::vmovl(internal::vget_high(v1)),
-                                                       internal::vmovl(internal::vget_high(v2)), scale))
-                             );
-}
-template <>
-inline int32x4_t divWrapQ<int32x4_t>(const int32x4_t &v1, const int32x4_t &v2, const float scale)
-{ return vcvtq_s32_f32(vmulq_f32(vmulq_n_f32(vcvtq_f32_s32(v1), scale), internal::vrecpq_f32(vcvtq_f32_s32(v2)))); }
-template <>
-inline uint32x4_t divWrapQ<uint32x4_t>(const uint32x4_t &v1, const uint32x4_t &v2, const float scale)
-{ return vcvtq_u32_f32(vmulq_f32(vmulq_n_f32(vcvtq_f32_u32(v1), scale), internal::vrecpq_f32(vcvtq_f32_u32(v2)))); }
-
-template <typename T>
-inline T divWrap(const T &v1, const T &v2, const float scale)
-{
-    return internal::vmovn(divWrapQ(internal::vmovl(v1), internal::vmovl(v2), scale));
-}
-template <>
-inline int32x2_t divWrap<int32x2_t>(const int32x2_t &v1, const int32x2_t &v2, const float scale)
-{ return vcvt_s32_f32(vmul_f32(vmul_n_f32(vcvt_f32_s32(v1), scale), internal::vrecp_f32(vcvt_f32_s32(v2)))); }
-template <>
-inline uint32x2_t divWrap<uint32x2_t>(const uint32x2_t &v1, const uint32x2_t &v2, const float scale)
-{ return vcvt_u32_f32(vmul_f32(vmul_n_f32(vcvt_f32_u32(v1), scale), internal::vrecp_f32(vcvt_f32_u32(v2)))); }
-
-inline  uint8x16_t vtstq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vtstq_u8 (v0, v1); }
-inline  uint16x8_t vtstq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vtstq_u16(v0, v1); }
-inline  uint32x4_t vtstq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vtstq_u32(v0, v1); }
-inline   int8x16_t vtstq(const int8x16_t   & v0, const int8x16_t   & v1) { return vreinterpretq_s8_u8  (vtstq_s8 (v0, v1)); }
-inline   int16x8_t vtstq(const int16x8_t   & v0, const int16x8_t   & v1) { return vreinterpretq_s16_u16(vtstq_s16(v0, v1)); }
-inline   int32x4_t vtstq(const int32x4_t   & v0, const int32x4_t   & v1) { return vreinterpretq_s32_u32(vtstq_s32(v0, v1)); }
-
-inline   uint8x8_t vtst(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vtst_u8 (v0, v1); }
-inline  uint16x4_t vtst(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vtst_u16(v0, v1); }
-inline  uint32x2_t vtst(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vtst_u32(v0, v1); }
-inline    int8x8_t vtst(const int8x8_t    & v0, const int8x8_t    & v1) { return vreinterpret_s8_u8  (vtst_s8 (v0, v1)); }
-inline   int16x4_t vtst(const int16x4_t   & v0, const int16x4_t   & v1) { return vreinterpret_s16_u16(vtst_s16(v0, v1)); }
-inline   int32x2_t vtst(const int32x2_t   & v0, const int32x2_t   & v1) { return vreinterpret_s32_u32(vtst_s32(v0, v1)); }
-#endif
-
-template <typename T>
-void div(const Size2D &size,
-         const T * src0Base, ptrdiff_t src0Stride,
-         const T * src1Base, ptrdiff_t src1Stride,
-         T * dstBase, ptrdiff_t dstStride,
-         f32 scale,
-         CONVERT_POLICY cpolicy)
-{
-    internal::assertSupportedConfiguration();
-
-#ifdef CAROTENE_NEON
-    typedef typename internal::VecTraits<T>::vec128 vec128;
-    typedef typename internal::VecTraits<T>::vec64 vec64;
-
-    if (scale == 0.0f ||
-        (std::numeric_limits<T>::is_integer &&
-         (scale * std::numeric_limits<T>::max()) <  1.0f &&
-         (scale * std::numeric_limits<T>::max()) > -1.0f))
-    {
-        for (size_t y = 0; y < size.height; ++y)
-        {
-            T * dst = internal::getRowPtr(dstBase, dstStride, y);
-            std::memset(dst, 0, sizeof(T) * size.width);
-        }
-        return;
-    }
-
-    const size_t step128 = 16 / sizeof(T);
-    size_t roiw128 = size.width >= (step128 - 1) ? size.width - step128 + 1 : 0;
-    const size_t step64 = 8 / sizeof(T);
-    size_t roiw64 = size.width >= (step64 - 1) ? size.width - step64 + 1 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const T * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const T * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        T * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        if (cpolicy == CONVERT_POLICY_SATURATE)
-        {
-            for (; j < roiw128; j += step128)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-
-                vec128 v_src0 = internal::vld1q(src0 + j);
-                vec128 v_src1 = internal::vld1q(src1 + j);
-
-                vec128 v_mask = vtstq(v_src1,v_src1);
-                internal::vst1q(dst + j, internal::vandq(v_mask, divSaturateQ(v_src0, v_src1, scale)));
-            }
-            for (; j < roiw64; j += step64)
-            {
-                vec64 v_src0 = internal::vld1(src0 + j);
-                vec64 v_src1 = internal::vld1(src1 + j);
-
-                vec64 v_mask = vtst(v_src1,v_src1);
-                internal::vst1(dst + j, internal::vand(v_mask,divSaturate(v_src0, v_src1, scale)));
-            }
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? internal::saturate_cast<T>(scale * src0[j] / src1[j]) : 0;
-            }
-        }
-        else // CONVERT_POLICY_WRAP
-        {
-            for (; j < roiw128; j += step128)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-
-                vec128 v_src0 = internal::vld1q(src0 + j);
-                vec128 v_src1 = internal::vld1q(src1 + j);
-
-                vec128 v_mask = vtstq(v_src1,v_src1);
-                internal::vst1q(dst + j, internal::vandq(v_mask, divWrapQ(v_src0, v_src1, scale)));
-            }
-            for (; j < roiw64; j += step64)
-            {
-                vec64 v_src0 = internal::vld1(src0 + j);
-                vec64 v_src1 = internal::vld1(src1 + j);
-
-                vec64 v_mask = vtst(v_src1,v_src1);
-                internal::vst1(dst + j, internal::vand(v_mask,divWrap(v_src0, v_src1, scale)));
-            }
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? (T)((s32)trunc(scale * src0[j] / src1[j])) : 0;
-            }
-        }
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)cpolicy;
-    (void)scale;
-#endif
-}
-
-#ifdef CAROTENE_NEON
-
-template <typename T>
-inline T recipSaturateQ(const T &v2, const float scale)
-{
-    return internal::vcombine(internal::vqmovn(recipSaturateQ(internal::vmovl(internal::vget_low(v2)), scale)),
-                              internal::vqmovn(recipSaturateQ(internal::vmovl(internal::vget_high(v2)), scale))
-                             );
-}
-template <>
-inline int32x4_t recipSaturateQ<int32x4_t>(const int32x4_t &v2, const float scale)
-{ return vcvtq_s32_f32(vmulq_n_f32(internal::vrecpq_f32(vcvtq_f32_s32(v2)), scale)); }
-template <>
-inline uint32x4_t recipSaturateQ<uint32x4_t>(const uint32x4_t &v2, const float scale)
-{ return vcvtq_u32_f32(vmulq_n_f32(internal::vrecpq_f32(vcvtq_f32_u32(v2)), scale)); }
-
-template <typename T>
-inline T recipSaturate(const T &v2, const float scale)
-{
-    return internal::vqmovn(recipSaturateQ(internal::vmovl(v2), scale));
-}
-template <>
-inline int32x2_t recipSaturate<int32x2_t>(const int32x2_t &v2, const float scale)
-{ return vcvt_s32_f32(vmul_n_f32(internal::vrecp_f32(vcvt_f32_s32(v2)), scale)); }
-template <>
-inline uint32x2_t recipSaturate<uint32x2_t>(const uint32x2_t &v2, const float scale)
-{ return vcvt_u32_f32(vmul_n_f32(internal::vrecp_f32(vcvt_f32_u32(v2)), scale)); }
-
-
-template <typename T>
-inline T recipWrapQ(const T &v2, const float scale)
-{
-    return internal::vcombine(internal::vmovn(recipWrapQ(internal::vmovl(internal::vget_low(v2)), scale)),
-                              internal::vmovn(recipWrapQ(internal::vmovl(internal::vget_high(v2)), scale))
-                             );
-}
-template <>
-inline int32x4_t recipWrapQ<int32x4_t>(const int32x4_t &v2, const float scale)
-{ return vcvtq_s32_f32(vmulq_n_f32(internal::vrecpq_f32(vcvtq_f32_s32(v2)), scale)); }
-template <>
-inline uint32x4_t recipWrapQ<uint32x4_t>(const uint32x4_t &v2, const float scale)
-{ return vcvtq_u32_f32(vmulq_n_f32(internal::vrecpq_f32(vcvtq_f32_u32(v2)), scale)); }
-
-template <typename T>
-inline T recipWrap(const T &v2, const float scale)
-{
-    return internal::vmovn(recipWrapQ(internal::vmovl(v2), scale));
-}
-template <>
-inline int32x2_t recipWrap<int32x2_t>(const int32x2_t &v2, const float scale)
-{ return vcvt_s32_f32(vmul_n_f32(internal::vrecp_f32(vcvt_f32_s32(v2)), scale)); }
-template <>
-inline uint32x2_t recipWrap<uint32x2_t>(const uint32x2_t &v2, const float scale)
-{ return vcvt_u32_f32(vmul_n_f32(internal::vrecp_f32(vcvt_f32_u32(v2)), scale)); }
-#endif
-
-template <typename T>
-void recip(const Size2D &size,
-           const T * src1Base, ptrdiff_t src1Stride,
-           T * dstBase, ptrdiff_t dstStride,
-           f32 scale,
-           CONVERT_POLICY cpolicy)
-{
-    internal::assertSupportedConfiguration();
-
-#ifdef CAROTENE_NEON
-    typedef typename internal::VecTraits<T>::vec128 vec128;
-    typedef typename internal::VecTraits<T>::vec64 vec64;
-
-    if (scale == 0.0f ||
-        (std::numeric_limits<T>::is_integer &&
-         scale <  1.0f &&
-         scale > -1.0f))
-    {
-        for (size_t y = 0; y < size.height; ++y)
-        {
-            T * dst = internal::getRowPtr(dstBase, dstStride, y);
-            std::memset(dst, 0, sizeof(T) * size.width);
-        }
-        return;
-    }
-
-    const size_t step128 = 16 / sizeof(T);
-    size_t roiw128 = size.width >= (step128 - 1) ? size.width - step128 + 1 : 0;
-    const size_t step64 = 8 / sizeof(T);
-    size_t roiw64 = size.width >= (step64 - 1) ? size.width - step64 + 1 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const T * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        T * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        if (cpolicy == CONVERT_POLICY_SATURATE)
-        {
-            for (; j < roiw128; j += step128)
-            {
-                internal::prefetch(src1 + j);
-
-                vec128 v_src1 = internal::vld1q(src1 + j);
-
-                vec128 v_mask = vtstq(v_src1,v_src1);
-                internal::vst1q(dst + j, internal::vandq(v_mask, recipSaturateQ(v_src1, scale)));
-            }
-            for (; j < roiw64; j += step64)
-            {
-                vec64 v_src1 = internal::vld1(src1 + j);
-
-                vec64 v_mask = vtst(v_src1,v_src1);
-                internal::vst1(dst + j, internal::vand(v_mask, recipSaturate(v_src1, scale)));
-            }
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? internal::saturate_cast<T>(scale / src1[j]) : 0;
-            }
-        }
-        else // CONVERT_POLICY_WRAP
-        {
-            for (; j < roiw128; j += step128)
-            {
-                internal::prefetch(src1 + j);
-
-                vec128 v_src1 = internal::vld1q(src1 + j);
-
-                vec128 v_mask = vtstq(v_src1,v_src1);
-                internal::vst1q(dst + j, internal::vandq(v_mask, recipWrapQ(v_src1, scale)));
-            }
-            for (; j < roiw64; j += step64)
-            {
-                vec64 v_src1 = internal::vld1(src1 + j);
-
-                vec64 v_mask = vtst(v_src1,v_src1);
-                internal::vst1(dst + j, internal::vand(v_mask, recipWrap(v_src1, scale)));
-            }
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? (T)((s32)trunc(scale / src1[j])) : 0;
-            }
-        }
-    }
-#else
-    (void)size;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)cpolicy;
-    (void)scale;
-#endif
-}
-
-}
-
-void div(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const u8 * src1Base, ptrdiff_t src1Stride,
-         u8 * dstBase, ptrdiff_t dstStride,
-         f32 scale,
-         CONVERT_POLICY cpolicy)
-{
-    div<u8>(size, src0Base, src0Stride, src1Base, src1Stride, dstBase, dstStride, scale, cpolicy);
-}
-
-void div(const Size2D &size,
-         const s8 * src0Base, ptrdiff_t src0Stride,
-         const s8 * src1Base, ptrdiff_t src1Stride,
-         s8 * dstBase, ptrdiff_t dstStride,
-         f32 scale,
-         CONVERT_POLICY cpolicy)
-{
-    div<s8>(size, src0Base, src0Stride, src1Base, src1Stride, dstBase, dstStride, scale, cpolicy);
-}
-
-void div(const Size2D &size,
-         const u16 * src0Base, ptrdiff_t src0Stride,
-         const u16 * src1Base, ptrdiff_t src1Stride,
-         u16 * dstBase, ptrdiff_t dstStride,
-         f32 scale,
-         CONVERT_POLICY cpolicy)
-{
-    div<u16>(size, src0Base, src0Stride, src1Base, src1Stride, dstBase, dstStride, scale, cpolicy);
-}
-
-void div(const Size2D &size,
-         const s16 * src0Base, ptrdiff_t src0Stride,
-         const s16 * src1Base, ptrdiff_t src1Stride,
-         s16 * dstBase, ptrdiff_t dstStride,
-         f32 scale,
-         CONVERT_POLICY cpolicy)
-{
-    div<s16>(size, src0Base, src0Stride, src1Base, src1Stride, dstBase, dstStride, scale, cpolicy);
-}
-
-void div(const Size2D &size,
-         const s32 * src0Base, ptrdiff_t src0Stride,
-         const s32 * src1Base, ptrdiff_t src1Stride,
-         s32 * dstBase, ptrdiff_t dstStride,
-         f32 scale,
-         CONVERT_POLICY cpolicy)
-{
-    div<s32>(size, src0Base, src0Stride, src1Base, src1Stride, dstBase, dstStride, scale, cpolicy);
-}
-
-void div(const Size2D &size,
-         const f32 * src0Base, ptrdiff_t src0Stride,
-         const f32 * src1Base, ptrdiff_t src1Stride,
-         f32 * dstBase, ptrdiff_t dstStride,
-         f32 scale)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (scale == 0.0f)
-    {
-        for (size_t y = 0; y < size.height; ++y)
-        {
-            f32 * dst = internal::getRowPtr(dstBase, dstStride, y);
-            std::memset(dst, 0, sizeof(f32) * size.width);
-        }
-        return;
-    }
-
-    float32x4_t v_zero = vdupq_n_f32(0.0f);
-
-    size_t roiw128 = size.width >= 3 ? size.width - 3 : 0;
-    size_t roiw64 = size.width >= 1 ? size.width - 1 : 0;
-
-    if (std::fabs(scale - 1.0f) < FLT_EPSILON)
-    {
-        for (size_t i = 0; i < size.height; ++i)
-        {
-            const f32 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-            const f32 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-            f32 * dst = internal::getRowPtr(dstBase, dstStride, i);
-            size_t j = 0;
-
-            for (; j < roiw128; j += 4)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-
-                float32x4_t v_src0 = vld1q_f32(src0 + j);
-                float32x4_t v_src1 = vld1q_f32(src1 + j);
-
-                uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
-                vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
-                                   vreinterpretq_u32_f32(vmulq_f32(v_src0, internal::vrecpq_f32(v_src1))), v_mask)));
-            }
-
-            for (; j < roiw64; j += 2)
-            {
-                float32x2_t v_src0 = vld1_f32(src0 + j);
-                float32x2_t v_src1 = vld1_f32(src1 + j);
-
-                uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
-                vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
-                                  vreinterpret_u32_f32(vmul_f32(v_src0, internal::vrecp_f32(v_src1))), v_mask)));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? src0[j] / src1[j] : 0.0f;
-            }
-        }
-    }
-    else
-    {
-        for (size_t i = 0; i < size.height; ++i)
-        {
-            const f32 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-            const f32 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-            f32 * dst = internal::getRowPtr(dstBase, dstStride, i);
-            size_t j = 0;
-
-            for (; j < roiw128; j += 4)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-
-                float32x4_t v_src0 = vld1q_f32(src0 + j);
-                float32x4_t v_src1 = vld1q_f32(src1 + j);
-
-                uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
-                vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
-                                   vreinterpretq_u32_f32(vmulq_f32(vmulq_n_f32(v_src0, scale),
-                                                         internal::vrecpq_f32(v_src1))), v_mask)));
-            }
-
-            for (; j < roiw64; j += 2)
-            {
-                float32x2_t v_src0 = vld1_f32(src0 + j);
-                float32x2_t v_src1 = vld1_f32(src1 + j);
-
-                uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
-                vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
-                                  vreinterpret_u32_f32(vmul_f32(vmul_n_f32(v_src0, scale),
-                                                                internal::vrecp_f32(v_src1))), v_mask)));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? src0[j] * scale / src1[j] : 0.0f;
-            }
-        }
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)scale;
-#endif
-}
-
-void reciprocal(const Size2D &size,
-                const u8 * srcBase, ptrdiff_t srcStride,
-                u8 * dstBase, ptrdiff_t dstStride,
-                f32 scale,
-                CONVERT_POLICY cpolicy)
-{
-    recip<u8>(size, srcBase, srcStride, dstBase, dstStride, scale, cpolicy);
-}
-
-void reciprocal(const Size2D &size,
-                const s8 * srcBase, ptrdiff_t srcStride,
-                s8 * dstBase, ptrdiff_t dstStride,
-                f32 scale,
-                CONVERT_POLICY cpolicy)
-{
-    recip<s8>(size, srcBase, srcStride, dstBase, dstStride, scale, cpolicy);
-}
-
-void reciprocal(const Size2D &size,
-                const u16 * srcBase, ptrdiff_t srcStride,
-                u16 * dstBase, ptrdiff_t dstStride,
-                f32 scale,
-                CONVERT_POLICY cpolicy)
-{
-    recip<u16>(size, srcBase, srcStride, dstBase, dstStride, scale, cpolicy);
-}
-
-void reciprocal(const Size2D &size,
-                const s16 * srcBase, ptrdiff_t srcStride,
-                s16 * dstBase, ptrdiff_t dstStride,
-                f32 scale,
-                CONVERT_POLICY cpolicy)
-{
-    recip<s16>(size, srcBase, srcStride, dstBase, dstStride, scale, cpolicy);
-}
-
-void reciprocal(const Size2D &size,
-                const s32 * srcBase, ptrdiff_t srcStride,
-                s32 * dstBase, ptrdiff_t dstStride,
-                f32 scale,
-                CONVERT_POLICY cpolicy)
-{
-    recip<s32>(size, srcBase, srcStride, dstBase, dstStride, scale, cpolicy);
-}
-
-void reciprocal(const Size2D &size,
-                const f32 * srcBase, ptrdiff_t srcStride,
-                f32 * dstBase, ptrdiff_t dstStride,
-                f32 scale)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (scale == 0.0f)
-    {
-        for (size_t y = 0; y < size.height; ++y)
-        {
-            f32 * dst = internal::getRowPtr(dstBase, dstStride, y);
-            std::memset(dst, 0, sizeof(f32) * size.width);
-        }
-        return;
-    }
-
-    float32x4_t v_zero = vdupq_n_f32(0.0f);
-
-    size_t roiw128 = size.width >= 3 ? size.width - 3 : 0;
-    size_t roiw64 = size.width >= 1 ? size.width - 1 : 0;
-
-    if (std::fabs(scale - 1.0f) < FLT_EPSILON)
-    {
-        for (size_t i = 0; i < size.height; ++i)
-        {
-            const f32 * src1 = internal::getRowPtr(srcBase, srcStride, i);
-            f32 * dst = internal::getRowPtr(dstBase, dstStride, i);
-            size_t j = 0;
-
-            for (; j < roiw128; j += 4)
-            {
-                internal::prefetch(src1 + j);
-
-                float32x4_t v_src1 = vld1q_f32(src1 + j);
-
-                uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
-                vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
-                                   vreinterpretq_u32_f32(internal::vrecpq_f32(v_src1)), v_mask)));
-            }
-
-            for (; j < roiw64; j += 2)
-            {
-                float32x2_t v_src1 = vld1_f32(src1 + j);
-
-                uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
-                vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
-                                  vreinterpret_u32_f32(internal::vrecp_f32(v_src1)), v_mask)));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? 1.0f / src1[j] : 0;
-            }
-        }
-    }
-    else
-    {
-        for (size_t i = 0; i < size.height; ++i)
-        {
-            const f32 * src1 = internal::getRowPtr(srcBase, srcStride, i);
-            f32 * dst = internal::getRowPtr(dstBase, dstStride, i);
-            size_t j = 0;
-
-            for (; j < roiw128; j += 4)
-            {
-                internal::prefetch(src1 + j);
-
-                float32x4_t v_src1 = vld1q_f32(src1 + j);
-
-                uint32x4_t v_mask = vceqq_f32(v_src1,v_zero);
-                vst1q_f32(dst + j, vreinterpretq_f32_u32(vbicq_u32(
-                                   vreinterpretq_u32_f32(vmulq_n_f32(internal::vrecpq_f32(v_src1),
-                                                                     scale)),v_mask)));
-            }
-
-            for (; j < roiw64; j += 2)
-            {
-                float32x2_t v_src1 = vld1_f32(src1 + j);
-
-                uint32x2_t v_mask = vceq_f32(v_src1,vget_low_f32(v_zero));
-                vst1_f32(dst + j, vreinterpret_f32_u32(vbic_u32(
-                                  vreinterpret_u32_f32(vmul_n_f32(internal::vrecp_f32(v_src1),
-                                                                  scale)), v_mask)));
-            }
-
-            for (; j < size.width; j++)
-            {
-                dst[j] = src1[j] ? scale / src1[j] : 0;
-            }
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)scale;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/dot_product.cpp
+++ b/3rdparty/carotene/src/dot_product.cpp
@ -1,260 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-namespace CAROTENE_NS {
-
-f64 dotProduct(const Size2D &_size,
-               const u8 * src0Base, ptrdiff_t src0Stride,
-               const u8 * src1Base, ptrdiff_t src1Stride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (src0Stride == src1Stride &&
-        src0Stride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-
-// It is possible to accumulate up to 66051 uchar multiplication results in uint32 without overflow
-// We process 16 elements and accumulate two new elements per step. So we could handle 66051/2*16 elements
-#define DOT_UINT_BLOCKSIZE 66050*8
-    f64 result = 0.0;
-    for (size_t row = 0; row < size.height; ++row)
-    {
-        const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, row);
-        const u8 * src1 = internal::getRowPtr(src1Base, src1Stride, row);
-
-        size_t i = 0;
-        uint64x2_t ws = vmovq_n_u64(0);
-
-        while(i + 16 <= size.width)
-        {
-            size_t lim = std::min(i + DOT_UINT_BLOCKSIZE, size.width) - 16;
-
-            uint32x4_t s1 = vmovq_n_u32(0);
-            uint32x4_t s2 = vmovq_n_u32(0);
-
-            for (; i <= lim; i += 16)
-            {
-                internal::prefetch(src0 + i);
-                internal::prefetch(src1 + i);
-
-                uint8x16_t vs1 = vld1q_u8(src0 + i);
-                uint8x16_t vs2 = vld1q_u8(src1 + i);
-
-                uint16x8_t vdot1 = vmull_u8(vget_low_u8(vs1), vget_low_u8(vs2));
-                uint16x8_t vdot2 = vmull_u8(vget_high_u8(vs1), vget_high_u8(vs2));
-
-                s1 = vpadalq_u16(s1, vdot1);
-                s2 = vpadalq_u16(s2, vdot2);
-            }
-
-            ws = vpadalq_u32(ws, s1);
-            ws = vpadalq_u32(ws, s2);
-        }
-
-        if(i + 8 <= size.width)
-        {
-            uint8x8_t vs1 = vld1_u8(src0 + i);
-            uint8x8_t vs2 = vld1_u8(src1 + i);
-
-            ws = vpadalq_u32(ws, vpaddlq_u16(vmull_u8(vs1, vs2)));
-            i += 8;
-        }
-
-        result += (double)vget_lane_u64(vadd_u64(vget_low_u64(ws), vget_high_u64(ws)), 0);
-
-        for (; i < size.width; ++i)
-            result += s32(src0[i]) * s32(src1[i]);
-    }
-    return result;
-#else
-    (void)_size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-
-    return 0;
-#endif
-}
-
-f64 dotProduct(const Size2D &_size,
-               const s8 * src0Base, ptrdiff_t src0Stride,
-               const s8 * src1Base, ptrdiff_t src1Stride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (src0Stride == src1Stride &&
-        src0Stride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-
-// It is possible to accumulate up to 131071 schar multiplication results in sint32 without overflow
-// We process 16 elements and accumulate two new elements per step. So we could handle 131071/2*16 elements
-#define DOT_INT_BLOCKSIZE 131070*8
-    f64 result = 0.0;
-    for (size_t row = 0; row < size.height; ++row)
-    {
-        const s8 * src0 = internal::getRowPtr(src0Base, src0Stride, row);
-        const s8 * src1 = internal::getRowPtr(src1Base, src1Stride, row);
-
-        size_t i = 0;
-        int64x2_t ws = vmovq_n_s64(0);
-
-        while(i + 16 <= size.width)
-        {
-            size_t lim = std::min(i + DOT_UINT_BLOCKSIZE, size.width) - 16;
-
-            int32x4_t s1 = vmovq_n_s32(0);
-            int32x4_t s2 = vmovq_n_s32(0);
-
-            for (; i <= lim; i += 16)
-            {
-                internal::prefetch(src0 + i);
-                internal::prefetch(src1 + i);
-
-                int8x16_t vs1 = vld1q_s8(src0 + i);
-                int8x16_t vs2 = vld1q_s8(src1 + i);
-
-                int16x8_t vdot1 = vmull_s8(vget_low_s8(vs1), vget_low_s8(vs2));
-                int16x8_t vdot2 = vmull_s8(vget_high_s8(vs1), vget_high_s8(vs2));
-
-                s1 = vpadalq_s16(s1, vdot1);
-                s2 = vpadalq_s16(s2, vdot2);
-            }
-
-            ws = vpadalq_s32(ws, s1);
-            ws = vpadalq_s32(ws, s2);
-        }
-
-        if(i + 8 <= size.width)
-        {
-            int8x8_t vs1 = vld1_s8(src0 + i);
-            int8x8_t vs2 = vld1_s8(src1 + i);
-
-            ws = vpadalq_s32(ws, vpaddlq_s16(vmull_s8(vs1, vs2)));
-            i += 8;
-        }
-
-        result += (double)vget_lane_s64(vadd_s64(vget_low_s64(ws), vget_high_s64(ws)), 0);
-
-        for (; i < size.width; ++i)
-            result += s32(src0[i]) * s32(src1[i]);
-    }
-    return result;
-#else
-    (void)_size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-
-    return 0;
-#endif
-}
-
-f64 dotProduct(const Size2D &_size,
-               const f32 * src0Base, ptrdiff_t src0Stride,
-               const f32 * src1Base, ptrdiff_t src1Stride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (src0Stride == src1Stride &&
-        src0Stride == (ptrdiff_t)(size.width * sizeof(f32)))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-
-#define DOT_FLOAT_BLOCKSIZE (1 << 13)
-    f64 result = 0.0;
-    for (size_t row = 0; row < size.height; ++row)
-    {
-        const f32 * src0 = internal::getRowPtr(src0Base, src0Stride, row);
-        const f32 * src1 = internal::getRowPtr(src1Base, src1Stride, row);
-
-        size_t i = 0;
-        while(i + 4 <= size.width)
-        {
-            size_t lim = std::min(i + DOT_FLOAT_BLOCKSIZE, size.width) - 4;
-            float32x4_t v_sum = vdupq_n_f32(0.0f);
-
-            for( ; i <= lim; i += 4 )
-            {
-                internal::prefetch(src0 + i);
-                internal::prefetch(src1 + i);
-                v_sum = vmlaq_f32(v_sum, vld1q_f32(src0 + i), vld1q_f32(src1 + i));
-            }
-
-            float32x2_t vres = vpadd_f32(vget_low_f32(v_sum),vget_high_f32(v_sum));
-            result += vget_lane_f32(vres, 0) + vget_lane_f32(vres, 1);
-        }
-
-        if(i + 2 <= size.width)
-        {
-            float32x2_t vres = vmul_f32(vld1_f32(src0 + i), vld1_f32(src1 + i));
-            result += vget_lane_f32(vres, 0) + vget_lane_f32(vres, 1);
-            i += 2;
-        }
-
-        for (; i < size.width; ++i)
-            result += src0[i] * src1[i];
-    }
-    return result;
-#else
-    (void)_size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-
-    return 0;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/fast.cpp
+++ b/3rdparty/carotene/src/fast.cpp
@ -1,428 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-
-/* This is FAST corner detector, contributed to OpenCV by the author, Edward Rosten.
-   Below is the original copyright and the references */
-
-/*
-Copyright (c) 2006, 2008 Edward Rosten
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions
-are met:
-
- *Redistributions of source code must retain the above copyright
-  notice, this list of conditions and the following disclaimer.
-
- *Redistributions in binary form must reproduce the above copyright
-  notice, this list of conditions and the following disclaimer in the
-  documentation and/or other materials provided with the distribution.
-
- *Neither the name of the University of Cambridge nor the names of
-  its contributors may be used to endorse or promote products derived
-  from this software without specific prior written permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
-CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-*/
-
-/*
-The references are:
- * Machine learning for high-speed corner detection,
-   E. Rosten and T. Drummond, ECCV 2006
- * Faster and better: A machine learning approach to corner detection
-   E. Rosten, R. Porter and T. Drummond, PAMI, 2009
-*/
-
-#include "common.hpp"
-
-#include <vector>
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-namespace
-{
-
-void makeOffsets(ptrdiff_t pixel[], ptrdiff_t row_stride)
-{
-    pixel[0] = 0 + row_stride * 3;
-    pixel[1] = 1 + row_stride * 3;
-    pixel[2] = 2 + row_stride * 2;
-    pixel[3] = 3 + row_stride * 1;
-    pixel[4] = 3 + row_stride * 0;
-    pixel[5] = 3 + row_stride * -1;
-    pixel[6] = 2 + row_stride * -2;
-    pixel[7] = 1 + row_stride * -3;
-    pixel[8] = 0 + row_stride * -3;
-    pixel[9] = -1 + row_stride * -3;
-    pixel[10] = -2 + row_stride * -2;
-    pixel[11] = -3 + row_stride * -1;
-    pixel[12] = -3 + row_stride * 0;
-    pixel[13] = -3 + row_stride * 1;
-    pixel[14] = -2 + row_stride * 2;
-    pixel[15] = -1 + row_stride * 3;
-}
-
-u8 cornerScore(const u8* ptr, const ptrdiff_t pixel[])
-{
-    const s32 K = 8, N = 16 + K + 1;
-    s32 k, v = ptr[0];
-    s16 d[(N + 7) & ~7];
-    for( k = 0; k < N; k++ )
-        d[k] = (s16)(v - ptr[pixel[k]]);
-
-    int16x8_t q0 = vdupq_n_s16((s16)(-1000));
-    int16x8_t q1 = vdupq_n_s16((s16)(1000));
-
-    int16x8_t d0_7   = vld1q_s16(d +  0);
-    int16x8_t d8_15  = vld1q_s16(d +  8);
-    int16x8_t d16_23 = vld1q_s16(d + 16);
-    int16x8_t d24    = vld1q_s16(d + 24);
-
-    //k == 0
-    int16x8_t v0k0 = vextq_s16(d0_7, d8_15, 1);
-    int16x8_t v1k0 = vextq_s16(d0_7, d8_15, 2);
-    int16x8_t ak0 = vminq_s16(v0k0, v1k0);
-    int16x8_t bk0 = vmaxq_s16(v0k0, v1k0);
-
-    v0k0 = vextq_s16(d0_7, d8_15, 3);
-    ak0 = vminq_s16(ak0, v0k0);
-    bk0 = vmaxq_s16(bk0, v0k0);
-
-    v1k0 = vextq_s16(d0_7, d8_15, 4);
-    ak0 = vminq_s16(ak0, v1k0);
-    bk0 = vmaxq_s16(bk0, v1k0);
-
-    v0k0 = vextq_s16(d0_7, d8_15, 5);
-    ak0 = vminq_s16(ak0, v0k0);
-    bk0 = vmaxq_s16(bk0, v0k0);
-
-    v1k0 = vextq_s16(d0_7, d8_15, 6);
-    ak0 = vminq_s16(ak0, v1k0);
-    bk0 = vmaxq_s16(bk0, v1k0);
-
-    v0k0 = vextq_s16(d0_7, d8_15, 7);
-    ak0 = vminq_s16(ak0, v0k0);
-    bk0 = vmaxq_s16(bk0, v0k0);
-
-    ak0 = vminq_s16(ak0, d8_15);
-    bk0 = vmaxq_s16(bk0, d8_15);
-
-    q0 = vmaxq_s16(q0, vminq_s16(ak0, d0_7));
-    q1 = vminq_s16(q1, vmaxq_s16(bk0, d0_7));
-
-    v1k0 = vextq_s16(d8_15, d16_23, 1);
-    q0 = vmaxq_s16(q0, vminq_s16(ak0, v1k0));
-    q1 = vminq_s16(q1, vmaxq_s16(bk0, v1k0));
-
-    //k == 8
-    int16x8_t v0k8 = v1k0;
-    int16x8_t v1k8 = vextq_s16(d8_15, d16_23, 2);
-    int16x8_t ak8 = vminq_s16(v0k8, v1k8);
-    int16x8_t bk8 = vmaxq_s16(v0k8, v1k8);
-
-    v0k8 = vextq_s16(d8_15, d16_23, 3);
-    ak8 = vminq_s16(ak8, v0k8);
-    bk8 = vmaxq_s16(bk8, v0k8);
-
-    v1k8 = vextq_s16(d8_15, d16_23, 4);
-    ak8 = vminq_s16(ak8, v1k8);
-    bk8 = vmaxq_s16(bk8, v1k8);
-
-    v0k8 = vextq_s16(d8_15, d16_23, 5);
-    ak8 = vminq_s16(ak8, v0k8);
-    bk8 = vmaxq_s16(bk8, v0k8);
-
-    v1k8 = vextq_s16(d8_15, d16_23, 6);
-    ak8 = vminq_s16(ak8, v1k8);
-    bk8 = vmaxq_s16(bk8, v1k8);
-
-    v0k8 = vextq_s16(d8_15, d16_23, 7);
-    ak8 = vminq_s16(ak8, v0k8);
-    bk8 = vmaxq_s16(bk8, v0k8);
-
-    ak8 = vminq_s16(ak8, d16_23);
-    bk8 = vmaxq_s16(bk8, d16_23);
-
-    q0 = vmaxq_s16(q0, vminq_s16(ak8, d8_15));
-    q1 = vminq_s16(q1, vmaxq_s16(bk8, d8_15));
-
-    v1k8 = vextq_s16(d16_23, d24, 1);
-    q0 = vmaxq_s16(q0, vminq_s16(ak8, v1k8));
-    q1 = vminq_s16(q1, vmaxq_s16(bk8, v1k8));
-
-    //fin
-    int16x8_t q = vmaxq_s16(q0, vsubq_s16(vmovq_n_s16(0), q1));
-    int16x4_t q2 = vmax_s16(vget_low_s16(q), vget_high_s16(q));
-    int32x4_t q2w = vmovl_s16(q2);
-    int32x2_t q4 = vmax_s32(vget_low_s32(q2w), vget_high_s32(q2w));
-    int32x2_t q8 = vmax_s32(q4, vreinterpret_s32_s64(vshr_n_s64(vreinterpret_s64_s32(q4), 32)));
-
-    return (u8)(vget_lane_s32(q8, 0) - 1);
-}
-
-} //namespace
-#endif
-
-void FAST(const Size2D &size,
-          u8 *srcBase, ptrdiff_t srcStride,
-          KeypointStore *keypoints,
-          u8 threshold, bool nonmax_suppression)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    //keypoints.clear();
-
-    const s32 K = 8, N = 16 + K + 1;
-    ptrdiff_t i, j, k, pixel[N];
-    makeOffsets(pixel, srcStride);
-    for(k = 16; k < N; k++)
-        pixel[k] = pixel[k - 16];
-
-    uint8x16_t delta = vdupq_n_u8(128);
-    uint8x16_t t = vdupq_n_u8(threshold);
-    uint8x16_t K16 = vdupq_n_u8((u8)K);
-
-    u8 threshold_tab[512];
-    for( i = -255; i <= 255; i++ )
-        threshold_tab[i+255] = (u8)(i < -threshold ? 1 : i > threshold ? 2 : 0);
-
-    std::vector<u8> _buf((size.width+16)*3*(sizeof(ptrdiff_t) + sizeof(u8)) + 128);
-    u8* buf[3];
-    buf[0] = &_buf[0]; buf[1] = buf[0] + size.width; buf[2] = buf[1] + size.width;
-    ptrdiff_t* cpbuf[3];
-    cpbuf[0] = (ptrdiff_t*)internal::alignPtr(buf[2] + size.width, sizeof(ptrdiff_t)) + 1;
-    cpbuf[1] = cpbuf[0] + size.width + 1;
-    cpbuf[2] = cpbuf[1] + size.width + 1;
-    memset(buf[0], 0, size.width*3);
-
-    for(i = 3; i < (ptrdiff_t)size.height-2; i++)
-    {
-        const u8* ptr = internal::getRowPtr(srcBase, srcStride, i) + 3;
-        u8* curr = buf[(i - 3)%3];
-        ptrdiff_t* cornerpos = cpbuf[(i - 3)%3];
-        memset(curr, 0, size.width);
-        ptrdiff_t ncorners = 0;
-
-        if( i < (ptrdiff_t)size.height - 3 )
-        {
-            j = 3;
-
-            for(; j < (ptrdiff_t)size.width - 16 - 3; j += 16, ptr += 16)
-            {
-                internal::prefetch(ptr);
-                internal::prefetch(ptr + pixel[0]);
-                internal::prefetch(ptr + pixel[2]);
-
-                uint8x16_t v0 = vld1q_u8(ptr);
-                int8x16_t v1 = vreinterpretq_s8_u8(veorq_u8(vqsubq_u8(v0, t), delta));
-                int8x16_t v2 = vreinterpretq_s8_u8(veorq_u8(vqaddq_u8(v0, t), delta));
-
-                int8x16_t x0 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[0]), delta));
-                int8x16_t x1 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[4]), delta));
-                int8x16_t x2 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[8]), delta));
-                int8x16_t x3 = vreinterpretq_s8_u8(vsubq_u8(vld1q_u8(ptr + pixel[12]), delta));
-
-                uint8x16_t m0 =   vandq_u8(vcgtq_s8(x0, v2), vcgtq_s8(x1, v2));
-                uint8x16_t m1 =   vandq_u8(vcgtq_s8(v1, x0), vcgtq_s8(v1, x1));
-                m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x1, v2), vcgtq_s8(x2, v2)));
-                m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x1), vcgtq_s8(v1, x2)));
-                m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x2, v2), vcgtq_s8(x3, v2)));
-                m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x2), vcgtq_s8(v1, x3)));
-                m0 = vorrq_u8(m0, vandq_u8(vcgtq_s8(x3, v2), vcgtq_s8(x0, v2)));
-                m1 = vorrq_u8(m1, vandq_u8(vcgtq_s8(v1, x3), vcgtq_s8(v1, x0)));
-                m0 = vorrq_u8(m0, m1);
-
-                u64 mask[2];
-                vst1q_u64(mask, vreinterpretq_u64_u8(m0));
-
-                if( mask[0] == 0 )
-                {
-                    if (mask[1] != 0)
-                    {
-                        j -= 8;
-                        ptr -= 8;
-                    }
-                    continue;
-                }
-
-                uint8x16_t c0 = vmovq_n_u8(0);
-                uint8x16_t c1 = vmovq_n_u8(0);
-                uint8x16_t max0 = vmovq_n_u8(0);
-                uint8x16_t max1 = vmovq_n_u8(0);
-                for( k = 0; k < N; k++ )
-                {
-                    int8x16_t x = vreinterpretq_s8_u8(veorq_u8(vld1q_u8(ptr + pixel[k]), delta));
-                    m0 = vcgtq_s8(x, v2);
-                    m1 = vcgtq_s8(v1, x);
-
-                    c0 = vandq_u8(vsubq_u8(c0, m0), m0);
-                    c1 = vandq_u8(vsubq_u8(c1, m1), m1);
-
-                    max0 = vmaxq_u8(max0, c0);
-                    max1 = vmaxq_u8(max1, c1);
-                }
-
-                max0 = vmaxq_u8(max0, max1);
-                u8 m[16];
-                vst1q_u8(m, vcgtq_u8(max0, K16));
-
-                for( k = 0; k < 16; ++k )
-                    if(m[k])
-                    {
-                        cornerpos[ncorners++] = j+k;
-                        if(nonmax_suppression)
-                            curr[j+k] = cornerScore(ptr+k, pixel);
-                    }
-            }
-
-            for( ; j < (s32)size.width - 3; j++, ptr++ )
-            {
-                s32 v = ptr[0];
-                const u8* tab = &threshold_tab[0] - v + 255;
-                s32 d = tab[ptr[pixel[0]]] | tab[ptr[pixel[8]]];
-
-                if( d == 0 )
-                    continue;
-
-                d &= tab[ptr[pixel[2]]] | tab[ptr[pixel[10]]];
-                d &= tab[ptr[pixel[4]]] | tab[ptr[pixel[12]]];
-                d &= tab[ptr[pixel[6]]] | tab[ptr[pixel[14]]];
-
-                if( d == 0 )
-                    continue;
-
-                d &= tab[ptr[pixel[1]]] | tab[ptr[pixel[9]]];
-                d &= tab[ptr[pixel[3]]] | tab[ptr[pixel[11]]];
-                d &= tab[ptr[pixel[5]]] | tab[ptr[pixel[13]]];
-                d &= tab[ptr[pixel[7]]] | tab[ptr[pixel[15]]];
-
-                if( d & 1 )
-                {
-                    s32 vt = v - threshold, count = 0;
-
-                    for( k = 0; k < N; k++ )
-                    {
-                        s32 x = ptr[pixel[k]];
-                        if(x < vt)
-                        {
-                            if( ++count > K )
-                            {
-                                cornerpos[ncorners++] = j;
-                                if(nonmax_suppression)
-                                    curr[j] = cornerScore(ptr, pixel);
-                                break;
-                            }
-                        }
-                        else
-                            count = 0;
-                    }
-                }
-
-                if( d & 2 )
-                {
-                    s32 vt = v + threshold, count = 0;
-
-                    for( k = 0; k < N; k++ )
-                    {
-                        s32 x = ptr[pixel[k]];
-                        if(x > vt)
-                        {
-                            if( ++count > K )
-                            {
-                                cornerpos[ncorners++] = j;
-                                if(nonmax_suppression)
-                                    curr[j] = cornerScore(ptr, pixel);
-                                break;
-                            }
-                        }
-                        else
-                            count = 0;
-                    }
-                }
-            }
-        }
-
-        cornerpos[-1] = ncorners;
-
-        if( i == 3 )
-            continue;
-
-        const u8* prev = buf[(i - 4 + 3)%3];
-        const u8* pprev = buf[(i - 5 + 3)%3];
-        cornerpos = cpbuf[(i - 4 + 3)%3];
-        ncorners = cornerpos[-1];
-
-        for( k = 0; k < ncorners; k++ )
-        {
-            j = cornerpos[k];
-            s32 score = prev[j];
-            if( !nonmax_suppression ||
-                    (score > prev[j+1] && score > prev[j-1] &&
-                     score > pprev[j-1] && score > pprev[j] && score > pprev[j+1] &&
-                     score > curr[j-1] && score > curr[j] && score > curr[j+1]) )
-            {
-                keypoints->push((f32)j, (f32)(i-1), 7.f, -1, (f32)score);
-            }
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)keypoints;
-    (void)threshold;
-    (void)nonmax_suppression;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/fill_minmaxloc.cpp
+++ b/3rdparty/carotene/src/fill_minmaxloc.cpp
@ -1,442 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <typename T>
-void process(const T * src, size_t j0, size_t j1, size_t i,
-             T minVal, size_t * minLocPtr, s32 & minLocCount, s32 minLocCapacity,
-             T maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity)
-{
-    for (size_t j = j0; j < j1; ++j)
-    {
-        T val = src[j];
-
-        if (val == maxVal)
-        {
-            if (maxLocCount < maxLocCapacity)
-            {
-                maxLocPtr[maxLocCount] = j;
-                maxLocPtr[maxLocCount + 1] = i;
-            }
-            maxLocCount += 2;
-        }
-
-        if (val == minVal)
-        {
-            if (minLocCount < minLocCapacity)
-            {
-                minLocPtr[minLocCount] = j;
-                minLocPtr[minLocCount + 1] = i;
-            }
-            minLocCount += 2;
-        }
-    }
-}
-
-} // namespace
-
-#endif
-
-void fillMinMaxLocs(const Size2D & size,
-                    const u8 * srcBase, ptrdiff_t srcStride,
-                    u8 minVal, size_t * minLocPtr, s32 & minLocCount, s32 minLocCapacity,
-                    u8 maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    uint8x16_t v_maxval16 = vdupq_n_u8(maxVal), v_minval16 = vdupq_n_u8(minVal);
-    uint8x8_t v_maxval8 = vdup_n_u8(maxVal), v_minval8 = vdup_n_u8(minVal);
-
-    u64 mask[2] = { 0ul };
-
-    minLocCapacity <<= 1;
-    maxLocCapacity <<= 1;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-        size_t j = 0;
-
-        for ( ; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            uint8x16_t v_src = vld1q_u8(src + j);
-
-            uint8x16_t v_maxmask = vceqq_u8(v_src, v_maxval16);
-            uint8x16_t v_minmask = vceqq_u8(v_src, v_minval16);
-            uint8x16_t v_mask = vorrq_u8(v_maxmask, v_minmask);
-
-            vst1q_u8((u8 *)&mask[0], v_mask);
-
-            if (mask[0])
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-            if (mask[1])
-                process(src, j + 8, j + 16, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-        for ( ; j < roiw8; j += 8)
-        {
-            uint8x8_t v_src = vld1_u8(src + j);
-
-            uint8x8_t v_maxmask = vceq_u8(v_src, v_maxval8);
-            uint8x8_t v_minmask = vceq_u8(v_src, v_minval8);
-            uint8x8_t v_mask = vorr_u8(v_maxmask, v_minmask);
-
-            vst1_u8((u8 *)&mask[0], v_mask);
-
-            if (mask[0])
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-
-        process(src, j, size.width, i,
-                minVal, minLocPtr, minLocCount, minLocCapacity,
-                maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-    }
-
-    minLocCount >>= 1;
-    maxLocCount >>= 1;
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)minVal;
-    (void)minLocPtr;
-    (void)minLocCount;
-    (void)minLocCapacity;
-    (void)maxVal;
-    (void)maxLocPtr;
-    (void)maxLocCount;
-    (void)maxLocCapacity;
-#endif
-}
-
-void fillMinMaxLocs(const Size2D & size,
-                    const u16 * srcBase, ptrdiff_t srcStride,
-                    u16 minVal, size_t * minLocPtr, s32 & minLocCount, s32 minLocCapacity,
-                    u16 maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    uint16x8_t v_maxval8 = vdupq_n_u16(maxVal),
-               v_minval8 = vdupq_n_u16(minVal);
-    u64 mask[2] = { 0ul };
-
-    minLocCapacity <<= 1;
-    maxLocCapacity <<= 1;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u16 * src = internal::getRowPtr(srcBase, srcStride, i);
-        size_t j = 0;
-
-        for ( ; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            uint16x8_t v_src0 = vld1q_u16(src + j), v_src1 = vld1q_u16(src + j + 8);
-
-            uint16x8_t v_mask0 = vorrq_u16(vceqq_u16(v_src0, v_maxval8), vceqq_u16(v_src0, v_minval8));
-            uint16x8_t v_mask1 = vorrq_u16(vceqq_u16(v_src1, v_maxval8), vceqq_u16(v_src1, v_minval8));
-
-            vst1q_u8((u8 *)&mask[0], vcombine_u8(vmovn_u16(v_mask0), vmovn_u16(v_mask1)));
-
-            if (mask[0])
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-            if (mask[1])
-                process(src, j + 8, j + 16, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-        for ( ; j < roiw8; j += 8)
-        {
-            internal::prefetch(src + j);
-            uint16x8_t v_src = vld1q_u16(src + j);
-
-            uint16x8_t v_maxmask = vceqq_u16(v_src, v_maxval8);
-            uint16x8_t v_minmask = vceqq_u16(v_src, v_minval8);
-            uint16x8_t v_mask = vorrq_u16(v_maxmask, v_minmask);
-
-            vst1_u8((u8 *)&mask[0], vmovn_u16(v_mask));
-
-            if (mask[0])
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-
-        process(src, j, size.width, i,
-                minVal, minLocPtr, minLocCount, minLocCapacity,
-                maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-    }
-
-    minLocCount >>= 1;
-    maxLocCount >>= 1;
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)minVal;
-    (void)minLocPtr;
-    (void)minLocCount;
-    (void)minLocCapacity;
-    (void)maxVal;
-    (void)maxLocPtr;
-    (void)maxLocCount;
-    (void)maxLocCapacity;
-#endif
-}
-
-void fillMinMaxLocs(const Size2D & size,
-                    const s16 * srcBase, ptrdiff_t srcStride,
-                    s16 minVal, size_t * minLocPtr, s32 & minLocCount, s32 minLocCapacity,
-                    s16 maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    int16x8_t v_maxval8 = vdupq_n_s16(maxVal),
-              v_minval8 = vdupq_n_s16(minVal);
-    u64 mask[2] = { 0ul };
-
-    minLocCapacity <<= 1;
-    maxLocCapacity <<= 1;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const s16 * src = internal::getRowPtr(srcBase, srcStride, i);
-        size_t j = 0;
-
-        for ( ; j < roiw16; j += 16)
-        {
-            internal::prefetch(src + j);
-            int16x8_t v_src0 = vld1q_s16(src + j), v_src1 = vld1q_s16(src + j + 8);
-
-            uint16x8_t v_mask0 = vorrq_u16(vceqq_s16(v_src0, v_maxval8), vceqq_s16(v_src0, v_minval8));
-            uint16x8_t v_mask1 = vorrq_u16(vceqq_s16(v_src1, v_maxval8), vceqq_s16(v_src1, v_minval8));
-
-            vst1q_u8((u8 *)&mask[0], vcombine_u8(vmovn_u16(v_mask0), vmovn_u16(v_mask1)));
-
-            if (mask[0])
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-            if (mask[1])
-                process(src, j + 8, j + 16, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-        for ( ; j < roiw8; j += 8)
-        {
-            internal::prefetch(src + j);
-            int16x8_t v_src = vld1q_s16(src + j);
-
-            uint16x8_t v_maxmask = vceqq_s16(v_src, v_maxval8);
-            uint16x8_t v_minmask = vceqq_s16(v_src, v_minval8);
-            uint16x8_t v_mask = vorrq_u16(v_maxmask, v_minmask);
-
-            vst1_u8((u8 *)&mask[0], vmovn_u16(v_mask));
-
-            if (mask[0])
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-
-        process(src, j, size.width, i,
-                minVal, minLocPtr, minLocCount, minLocCapacity,
-                maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-    }
-
-    minLocCount >>= 1;
-    maxLocCount >>= 1;
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)minVal;
-    (void)minLocPtr;
-    (void)minLocCount;
-    (void)minLocCapacity;
-    (void)maxVal;
-    (void)maxLocPtr;
-    (void)maxLocCount;
-    (void)maxLocCapacity;
-#endif
-}
-
-void fillMinMaxLocs(const Size2D & size,
-                    const s32 * srcBase, ptrdiff_t srcStride,
-                    s32 minVal, size_t * minLocPtr, s32 & minLocCount, s32 minLocCapacity,
-                    s32 maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    int32x4_t v_maxval4 = vdupq_n_s32(maxVal),
-              v_minval4 = vdupq_n_s32(minVal);
-    u64 mask = 0ul;
-
-    minLocCapacity <<= 1;
-    maxLocCapacity <<= 1;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const s32 * src = internal::getRowPtr(srcBase, srcStride, i);
-        size_t j = 0;
-
-        for ( ; j < roiw8; j += 8)
-        {
-            internal::prefetch(src + j);
-            int32x4_t v_src0 = vld1q_s32(src + j), v_src1 = vld1q_s32(src + j + 4);
-
-            uint32x4_t v_mask0 = vorrq_u32(vceqq_s32(v_src0, v_maxval4), vceqq_s32(v_src0, v_minval4));
-            uint32x4_t v_mask1 = vorrq_u32(vceqq_s32(v_src1, v_maxval4), vceqq_s32(v_src1, v_minval4));
-
-            vst1_u8((u8 *)&mask, vmovn_u16(vcombine_u16(vmovn_u32(v_mask0), vmovn_u32(v_mask1))));
-
-            if (mask)
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-
-        process(src, j, size.width, i,
-                minVal, minLocPtr, minLocCount, minLocCapacity,
-                maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-    }
-
-    minLocCount >>= 1;
-    maxLocCount >>= 1;
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)minVal;
-    (void)minLocPtr;
-    (void)minLocCount;
-    (void)minLocCapacity;
-    (void)maxVal;
-    (void)maxLocPtr;
-    (void)maxLocCount;
-    (void)maxLocCapacity;
-#endif
-}
-
-void fillMinMaxLocs(const Size2D & size,
-                    const u32 * srcBase, ptrdiff_t srcStride,
-                    u32 minVal, size_t * minLocPtr, s32 & minLocCount, s32 minLocCapacity,
-                    u32 maxVal, size_t * maxLocPtr, s32 & maxLocCount, s32 maxLocCapacity)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    uint32x4_t v_maxval4 = vdupq_n_u32(maxVal),
-               v_minval4 = vdupq_n_u32(minVal);
-    u64 mask = 0ul;
-
-    minLocCapacity <<= 1;
-    maxLocCapacity <<= 1;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u32 * src = internal::getRowPtr(srcBase, srcStride, i);
-        size_t j = 0;
-
-        for ( ; j < roiw8; j += 8)
-        {
-            internal::prefetch(src + j);
-            uint32x4_t v_src0 = vld1q_u32(src + j), v_src1 = vld1q_u32(src + j + 4);
-
-            uint32x4_t v_mask0 = vorrq_u32(vceqq_u32(v_src0, v_maxval4), vceqq_u32(v_src0, v_minval4));
-            uint32x4_t v_mask1 = vorrq_u32(vceqq_u32(v_src1, v_maxval4), vceqq_u32(v_src1, v_minval4));
-
-            vst1_u8((u8 *)&mask, vmovn_u16(vcombine_u16(vmovn_u32(v_mask0), vmovn_u32(v_mask1))));
-
-            if (mask)
-                process(src, j, j + 8, i,
-                        minVal, minLocPtr, minLocCount, minLocCapacity,
-                        maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-        }
-
-        process(src, j, size.width, i,
-                minVal, minLocPtr, minLocCount, minLocCapacity,
-                maxVal, maxLocPtr, maxLocCount, maxLocCapacity);
-    }
-
-    minLocCount >>= 1;
-    maxLocCount >>= 1;
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)minVal;
-    (void)minLocPtr;
-    (void)minLocCount;
-    (void)minLocCapacity;
-    (void)maxVal;
-    (void)maxLocPtr;
-    (void)maxLocCount;
-    (void)maxLocCapacity;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/flip.cpp
+++ b/3rdparty/carotene/src/flip.cpp
@ -1,222 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-bool isFlipSupported(FLIP_MODE flipMode, u32 elemSize)
-{
-    bool supportedElemSize = (elemSize == 1) || (elemSize == 2) || (elemSize == 3) || (elemSize == 4);
-    return isSupportedConfiguration() &&
-            ((supportedElemSize && ((flipMode == FLIP_BOTH_MODE) || (flipMode == FLIP_HORIZONTAL_MODE))) ||
-             (flipMode == FLIP_VERTICAL_MODE));
-}
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <typename T>
-void flip(const Size2D & size,
-          const void * srcBase, ptrdiff_t srcStride,
-          void * dstBase, ptrdiff_t dstStride,
-          FLIP_MODE flipMode)
-{
-    using namespace internal;
-
-    typedef typename VecTraits<T>::vec128 vec128;
-    typedef typename VecTraits<T>::vec64 vec64;
-
-    u32 step_base = 16 / sizeof(T), step_tail = 8 / sizeof(T);
-    size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
-    size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const T * src = getRowPtr((const T *)srcBase, srcStride, i);
-        T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
-        size_t js = 0, jd = size.width;
-
-        for (; js < roiw_base; js += step_base, jd -= step_base)
-        {
-            prefetch(src + js);
-
-            vec128 v_src = vld1q(src + js);
-            vec128 v_dst = vrev64q(v_src);
-            v_dst = vcombine(vget_high(v_dst), vget_low(v_dst));
-            vst1q(dst + jd - step_base, v_dst);
-        }
-        for (; js < roiw_tail; js += step_tail, jd -= step_tail)
-        {
-            vec64 v_src = vld1(src + js);
-            vst1(dst + jd - step_tail, vrev64(v_src));
-        }
-
-        for (--jd; js < size.width; ++js, --jd)
-            dst[jd] = src[js];
-    }
-}
-
-template <typename T>
-void flip3(const Size2D & size,
-           const void * srcBase, ptrdiff_t srcStride,
-           void * dstBase, ptrdiff_t dstStride,
-           FLIP_MODE flipMode)
-{
-    using namespace internal;
-
-#ifndef ANDROID
-    typedef typename VecTraits<T, 3>::vec128 vec128;
-#endif
-    typedef typename VecTraits<T, 3>::vec64 vec64;
-
-#ifndef ANDROID
-    u32 step_base = 16 / sizeof(T), step_base3 = step_base * 3;
-    size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
-#endif
-    u32 step_tail = 8 / sizeof(T), step_tail3 = step_tail * 3;
-    size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const T * src = getRowPtr((const T *)srcBase, srcStride, i);
-        T * dst = getRowPtr((T *)dstBase, dstStride, (flipMode & FLIP_VERTICAL_MODE) != 0 ? size.height - i - 1 : i);
-        size_t j = 0, js = 0, jd = size.width * 3;
-
-#ifndef ANDROID
-        for (; j < roiw_base; j += step_base, js += step_base3, jd -= step_base3)
-        {
-            prefetch(src + js);
-
-            vec128 v_src = vld3q(src + js), v_dst;
-            v_src.val[0] = vrev64q(v_src.val[0]);
-            v_src.val[1] = vrev64q(v_src.val[1]);
-            v_src.val[2] = vrev64q(v_src.val[2]);
-
-            v_dst.val[0] = vcombine(vget_high(v_src.val[0]), vget_low(v_src.val[0]));
-            v_dst.val[1] = vcombine(vget_high(v_src.val[1]), vget_low(v_src.val[1]));
-            v_dst.val[2] = vcombine(vget_high(v_src.val[2]), vget_low(v_src.val[2]));
-
-            vst3q(dst + jd - step_base3, v_dst);
-        }
-#endif // ANDROID
-
-        for (; j < roiw_tail; j += step_tail, js += step_tail3, jd -= step_tail3)
-        {
-            vec64 v_src = vld3(src + js), v_dst;
-            v_dst.val[0] = vrev64(v_src.val[0]);
-            v_dst.val[1] = vrev64(v_src.val[1]);
-            v_dst.val[2] = vrev64(v_src.val[2]);
-
-            vst3(dst + jd - step_tail3, v_dst);
-        }
-
-        for (jd -= 3; j < size.width; ++j, js += 3, jd -= 3)
-        {
-            dst[jd] = src[js];
-            dst[jd + 1] = src[js + 1];
-            dst[jd + 2] = src[js + 2];
-        }
-    }
-}
-
-typedef void (* flipFunc)(const Size2D &size,
-                  const void * srcBase, ptrdiff_t srcStride,
-                  void * dstBase, ptrdiff_t dstStride,
-                  FLIP_MODE flipMode);
-
-} // namespace
-
-#endif
-
-void flip(const Size2D &size,
-          const u8 * srcBase, ptrdiff_t srcStride,
-          u8 * dstBase, ptrdiff_t dstStride,
-          FLIP_MODE flipMode, u32 elemSize)
-{
-    internal::assertSupportedConfiguration(isFlipSupported(flipMode, elemSize));
-#ifdef CAROTENE_NEON
-
-    if (flipMode == FLIP_VERTICAL_MODE)
-    {
-        for (size_t y = 0; y < size.height; ++y)
-        {
-            const u8 * src_row = internal::getRowPtr(srcBase, srcStride, y);
-            u8 * dst_row = internal::getRowPtr(dstBase, dstStride, size.height - y - 1);
-
-            std::memcpy(dst_row, src_row, elemSize * size.width);
-        }
-        return;
-    }
-
-    flipFunc func = NULL;
-
-    if (elemSize == (u32)sizeof(u8))
-        func = &flip<u8>;
-    if (elemSize == (u32)sizeof(u16))
-        func = &flip<u16>;
-    if (elemSize == (u32)sizeof(u32))
-        func = &flip<u32>;
-    if (elemSize == (u32)sizeof(u8) * 3)
-        func = &flip3<u8>;
-
-    if (func == NULL)
-        return;
-
-    func(size,
-         srcBase, srcStride,
-         dstBase, dstStride,
-         flipMode);
-
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)flipMode;
-    (void)elemSize;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/gaussian_blur.cpp
+++ b/3rdparty/carotene/src/gaussian_blur.cpp
--- a/3rdparty/carotene/src/in_range.cpp
+++ b/3rdparty/carotene/src/in_range.cpp
@ -1,195 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-inline void vnst(u8* dst, uint8x16_t v1, uint8x16_t v2) { vst1q_u8(dst, v1); vst1q_u8(dst+16, v2); }
-inline void vnst(u8* dst, uint16x8_t v1, uint16x8_t v2) { vst1q_u8(dst, vcombine_u8(vmovn_u16(v1), vmovn_u16(v2))); }
-inline void vnst(u8* dst, uint32x4_t v1, uint32x4_t v2) { vst1_u8(dst, vmovn_u16(vcombine_u16(vmovn_u32(v1), vmovn_u32(v2)))); }
-
-template <typename T, int elsize> struct vtail
-{
-    static inline void inRange(const T *, const T *, const T *,
-                               u8 *, size_t &, size_t)
-    {
-        //do nothing since there couldn't be enough data
-    }
-};
-template <typename T> struct vtail<T, 2>
-{
-    static inline void inRange(const T * src, const T * rng1, const T * rng2,
-                               u8 * dst, size_t &x, size_t width)
-    {
-        typedef typename internal::VecTraits<T>::vec128 vec128;
-        typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
-        //There no more than 15 elements in the tail, so we could handle 8 element vector only once
-        if( x + 8 < width)
-        {
-             vec128  vs = internal::vld1q( src + x);
-             vec128 vr1 = internal::vld1q(rng1 + x);
-             vec128 vr2 = internal::vld1q(rng2 + x);
-            uvec128  vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
-            internal::vst1(dst + x, internal::vmovn(vd));
-            x+=8;
-        }
-    }
-};
-template <typename T> struct vtail<T, 1>
-{
-    static inline void inRange(const T * src, const T * rng1, const T * rng2,
-                               u8 * dst, size_t &x, size_t width)
-    {
-        typedef typename internal::VecTraits<T>::vec128 vec128;
-        typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
-        typedef typename internal::VecTraits<T>::vec64 vec64;
-        typedef typename internal::VecTraits<T>::unsign::vec64 uvec64;
-        //There no more than 31 elements in the tail, so we could handle once 16+8 or 16 or 8 elements
-        if( x + 16 < width)
-        {
-             vec128  vs = internal::vld1q( src + x);
-             vec128 vr1 = internal::vld1q(rng1 + x);
-             vec128 vr2 = internal::vld1q(rng2 + x);
-            uvec128  vd = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
-            internal::vst1q(dst + x, vd);
-            x+=16;
-        }
-        if( x + 8 < width)
-        {
-             vec64  vs = internal::vld1( src + x);
-             vec64 vr1 = internal::vld1(rng1 + x);
-             vec64 vr2 = internal::vld1(rng2 + x);
-            uvec64  vd = internal::vand(internal::vcge(vs, vr1), internal::vcge(vr2, vs));
-            internal::vst1(dst + x, vd);
-            x+=8;
-        }
-    }
-};
-
-template <typename T>
-inline void inRangeCheck(const Size2D &_size,
-                         const T * srcBase, ptrdiff_t srcStride,
-                         const T * rng1Base, ptrdiff_t rng1Stride,
-                         const T * rng2Base, ptrdiff_t rng2Stride,
-                         u8 * dstBase, ptrdiff_t dstStride)
-{
-    typedef typename internal::VecTraits<T>::vec128 vec128;
-    typedef typename internal::VecTraits<T>::unsign::vec128 uvec128;
-
-    Size2D size(_size);
-    if (srcStride == dstStride &&
-        srcStride == rng1Stride &&
-        srcStride == rng2Stride &&
-        srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    const size_t width = size.width & ~( 32/sizeof(T) - 1 );
-
-    for(size_t j = 0; j < size.height; ++j)
-    {
-        const T *  src = internal::getRowPtr( srcBase,  srcStride, j);
-        const T * rng1 = internal::getRowPtr(rng1Base, rng1Stride, j);
-        const T * rng2 = internal::getRowPtr(rng2Base, rng2Stride, j);
-             u8 *  dst = internal::getRowPtr( dstBase,  dstStride, j);
-        size_t i = 0;
-        for( ; i < width; i += 32/sizeof(T) )
-        {
-            internal::prefetch(src + i);
-            internal::prefetch(rng1 + i);
-            internal::prefetch(rng2 + i);
-
-             vec128  vs = internal::vld1q( src + i);
-             vec128 vr1 = internal::vld1q(rng1 + i);
-             vec128 vr2 = internal::vld1q(rng2 + i);
-            uvec128 vd1 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
-                     vs = internal::vld1q( src + i + 16/sizeof(T));
-                    vr1 = internal::vld1q(rng1 + i + 16/sizeof(T));
-                    vr2 = internal::vld1q(rng2 + i + 16/sizeof(T));
-            uvec128 vd2 = internal::vandq(internal::vcgeq(vs, vr1), internal::vcgeq(vr2, vs));
-            vnst(dst + i, vd1, vd2);
-        }
-        vtail<T, sizeof(T)>::inRange(src, rng1, rng2, dst, i, size.width);
-        for( ; i < size.width; i++ )
-            dst[i] = (u8)(-(rng1[i] <= src[i] && src[i] <= rng2[i]));
-    }
-}
-
-}
-
-#define INRANGEFUNC(T)                                       \
-void inRange(const Size2D &_size,                            \
-             const T * srcBase, ptrdiff_t srcStride,         \
-             const T * rng1Base, ptrdiff_t rng1Stride,       \
-             const T * rng2Base, ptrdiff_t rng2Stride,       \
-             u8 * dstBase, ptrdiff_t dstStride)              \
-{                                                            \
-    internal::assertSupportedConfiguration();                \
-    inRangeCheck(_size, srcBase, srcStride,                  \
-                 rng1Base, rng1Stride, rng2Base, rng2Stride, \
-                 dstBase, dstStride);                        \
-}
-#else
-#define INRANGEFUNC(T)                                       \
-void inRange(const Size2D &,                                 \
-             const T *, ptrdiff_t,                           \
-             const T *, ptrdiff_t,                           \
-             const T *, ptrdiff_t,                           \
-             u8 *, ptrdiff_t)                                \
-{                                                            \
-    internal::assertSupportedConfiguration();                \
-}
-#endif
-
-INRANGEFUNC(u8)
-INRANGEFUNC(s8)
-INRANGEFUNC(u16)
-INRANGEFUNC(s16)
-INRANGEFUNC(s32)
-INRANGEFUNC(f32)
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/integral.cpp
+++ b/3rdparty/carotene/src/integral.cpp
@ -1,238 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-namespace CAROTENE_NS {
-
-void integral(const Size2D &size,
-              const u8 * srcBase, ptrdiff_t srcStride,
-              u32 * sumBase, ptrdiff_t sumStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    uint32x4_t v_zero = vmovq_n_u32(0u);
-
-    // the first iteration
-    const u8 * src = internal::getRowPtr(srcBase, srcStride, 0);
-    u32 * sum = internal::getRowPtr(sumBase, sumStride, 0);
-
-    uint32x4_t prev = v_zero;
-    size_t j = 0u;
-
-    for ( ; j + 7 < size.width; j += 8)
-    {
-        internal::prefetch(sum + j);
-        internal::prefetch(src + j);
-
-        uint8x8_t el8shr0 = vld1_u8(src + j);
-        uint8x8_t el8shr1 = vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(el8shr0), 8));
-        uint8x8_t el8shr2 = vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(el8shr0), 16));
-        uint8x8_t el8shr3 = vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(el8shr0), 24));
-
-        uint16x8_t el8shr12 =  vaddl_u8(el8shr1, el8shr2);
-        uint16x8_t el8shr03 =  vaddl_u8(el8shr0, el8shr3);
-
-        uint16x8_t el8 = vaddq_u16(el8shr12, el8shr03);
-        uint16x4_t el4h = vadd_u16(vget_low_u16(el8), vget_high_u16(el8));
-
-        uint32x4_t vsuml = vaddw_u16(prev, vget_low_u16(el8));
-        uint32x4_t vsumh = vaddw_u16(prev, el4h);
-
-        vst1q_u32(sum + j, vsuml);
-        vst1q_u32(sum + j + 4, vsumh);
-
-        prev = vaddw_u16(prev, vdup_lane_u16(el4h, 3));
-    }
-
-    for (u32 v = vgetq_lane_u32(prev, 3); j < size.width; ++j)
-        sum[j] = (v += src[j]);
-
-    // the others
-    for (size_t i = 1; i < size.height ; ++i)
-    {
-        src = internal::getRowPtr(srcBase, srcStride, i);
-        u32 * prevSum = internal::getRowPtr(sumBase, sumStride, i - 1);
-        sum = internal::getRowPtr(sumBase, sumStride, i);
-
-        prev = v_zero;
-        j = 0u;
-
-        for ( ; j + 7 < size.width; j += 8)
-        {
-            internal::prefetch(sum + j);
-            internal::prefetch(src + j);
-
-            uint32x4_t vsuml = vld1q_u32(prevSum + j);
-            uint32x4_t vsumh = vld1q_u32(prevSum + j + 4);
-
-            uint8x8_t el8shr0 = vld1_u8(src + j);
-            uint8x8_t el8shr1 = vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(el8shr0), 8));
-            uint8x8_t el8shr2 = vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(el8shr0), 16));
-            uint8x8_t el8shr3 = vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(el8shr0), 24));
-
-            vsuml = vaddq_u32(vsuml, prev);
-            vsumh = vaddq_u32(vsumh, prev);
-
-            uint16x8_t el8shr12 =  vaddl_u8(el8shr1, el8shr2);
-            uint16x8_t el8shr03 =  vaddl_u8(el8shr0, el8shr3);
-
-            uint16x8_t el8 = vaddq_u16(el8shr12, el8shr03);
-            uint16x4_t el4h = vadd_u16(vget_low_u16(el8), vget_high_u16(el8));
-
-            vsuml = vaddw_u16(vsuml, vget_low_u16(el8));
-            vsumh = vaddw_u16(vsumh, el4h);
-
-            vst1q_u32(sum + j, vsuml);
-            vst1q_u32(sum + j + 4, vsumh);
-
-            prev = vaddw_u16(prev, vdup_lane_u16(el4h, 3));
-        }
-
-        for (u32 v = vgetq_lane_u32(prev, 3); j < size.width; ++j)
-            sum[j] = (v += src[j]) + prevSum[j];
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)sumBase;
-    (void)sumStride;
-#endif
-}
-
-void sqrIntegral(const Size2D &size,
-                 const u8 * srcBase, ptrdiff_t srcStride,
-                 f64 * sqsumBase, ptrdiff_t sqsumStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    uint16x8_t v_zero8 = vmovq_n_u16(0u);
-
-    // the first iteration
-    const u8 * src = internal::getRowPtr(srcBase, srcStride, 0);
-    f64 * sqsum = internal::getRowPtr(sqsumBase, sqsumStride, 0);
-
-    double prev = 0.;
-    size_t j = 0u;
-
-    for ( ; j + 7 < size.width; j += 8)
-    {
-        internal::prefetch(sqsum + j);
-        internal::prefetch(src + j);
-
-        uint8x8_t vsrc = vld1_u8(src + j);
-
-        uint16x8_t el8shr0 = vmull_u8(vsrc, vsrc);
-        uint16x8_t el8shr1 = vextq_u16(v_zero8, el8shr0, 7);
-
-        uint32x4_t el8shr01l =  vaddl_u16(vget_low_u16(el8shr0), vget_low_u16(el8shr1));
-        uint32x4_t el8shr01h =  vaddl_u16(vget_high_u16(el8shr0), vget_high_u16(el8shr1));
-
-        uint32x4_t el4h = vaddq_u32(el8shr01l, el8shr01h);
-
-        uint32x2_t el2l = vadd_u32(vget_low_u32(el8shr01l), vget_high_u32(el8shr01l));
-        uint32x2_t el2hl = vadd_u32(vget_low_u32(el4h), vget_high_u32(el8shr01l));
-        uint32x2_t el2hh = vadd_u32(vget_low_u32(el4h), vget_high_u32(el4h));
-
-        u32 buf[8];
-        vst1_u32(buf, vget_low_u32(el8shr01l));
-        vst1_u32(buf+2, el2l);
-        vst1_u32(buf+4, el2hl);
-        vst1_u32(buf+6, el2hh);
-        for(u32 k=0; k < 8; k++)
-            sqsum[j+k] = prev + buf[k];
-        prev += buf[7];
-    }
-
-    for (; j < size.width; ++j)
-        sqsum[j] = (prev += src[j]*src[j]);
-
-    // the others
-    for (size_t i = 1; i < size.height ; ++i)
-    {
-        src = internal::getRowPtr(srcBase, srcStride, i);
-        f64 * prevSqSum = internal::getRowPtr(sqsumBase, sqsumStride, i - 1);
-        sqsum = internal::getRowPtr(sqsumBase, sqsumStride, i);
-
-        prev = 0.;
-        j = 0u;
-
-        for ( ; j + 7 < size.width; j += 8)
-        {
-            internal::prefetch(sqsum + j);
-            internal::prefetch(src + j);
-
-            uint8x8_t vsrc = vld1_u8(src + j);
-
-            uint16x8_t el8shr0 = vmull_u8(vsrc, vsrc);
-            uint16x8_t el8shr1 = vextq_u16(v_zero8, el8shr0, 7);
-
-            uint32x4_t el8shr01l =  vaddl_u16(vget_low_u16(el8shr0), vget_low_u16(el8shr1));
-            uint32x4_t el8shr01h =  vaddl_u16(vget_high_u16(el8shr0), vget_high_u16(el8shr1));
-
-            uint32x4_t el4h = vaddq_u32(el8shr01l, el8shr01h);
-
-            uint32x2_t el2l = vadd_u32(vget_low_u32(el8shr01l), vget_high_u32(el8shr01l));
-            uint32x2_t el2hl = vadd_u32(vget_low_u32(el4h), vget_high_u32(el8shr01l));
-            uint32x2_t el2hh = vadd_u32(vget_low_u32(el4h), vget_high_u32(el4h));
-
-            u32 buf[8];
-            vst1_u32(buf, vget_low_u32(el8shr01l));
-            vst1_u32(buf+2, el2l);
-            vst1_u32(buf+4, el2hl);
-            vst1_u32(buf+6, el2hh);
-            for(u32 k=0; k < 8; k++)
-                sqsum[j+k] = prev + prevSqSum[j+k] + buf[k];
-            prev += buf[7];
-        }
-
-        for (; j < size.width; ++j)
-            sqsum[j] = (prev += src[j]*src[j]) + prevSqSum[j];
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)sqsumBase;
-    (void)sqsumStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/intrinsics.hpp
+++ b/3rdparty/carotene/src/intrinsics.hpp
@ -1,112 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#ifndef CAROTENE_INTRINSICS_HPP
-#define CAROTENE_INTRINSICS_HPP
-
-#include <carotene/definitions.hpp>
-
-#include <arm_neon.h>
-
-namespace CAROTENE_NS { namespace internal {
-
-/////////////// Custom NEON intrinsics ///////////////////
-
-// calculate reciprocal value
-
-inline float32x4_t vrecpq_f32(float32x4_t val)
-{
-    float32x4_t reciprocal = vrecpeq_f32(val);
-    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
-    reciprocal = vmulq_f32(vrecpsq_f32(val, reciprocal), reciprocal);
-    return reciprocal;
-}
-
-inline float32x2_t vrecp_f32(float32x2_t val)
-{
-    float32x2_t reciprocal = vrecpe_f32(val);
-    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
-    reciprocal = vmul_f32(vrecps_f32(val, reciprocal), reciprocal);
-    return reciprocal;
-}
-
-// caclulate sqrt value
-
-inline float32x4_t vrsqrtq_f32(float32x4_t val)
-{
-    float32x4_t e = vrsqrteq_f32(val);
-    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
-    e = vmulq_f32(vrsqrtsq_f32(vmulq_f32(e, e), val), e);
-    return e;
-}
-
-inline float32x2_t vrsqrt_f32(float32x2_t val)
-{
-    float32x2_t e = vrsqrte_f32(val);
-    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
-    e = vmul_f32(vrsqrts_f32(vmul_f32(e, e), val), e);
-    return e;
-}
-
-inline float32x4_t vsqrtq_f32(float32x4_t val)
-{
-    return vrecpq_f32(vrsqrtq_f32(val));
-}
-
-inline float32x2_t vsqrt_f32(float32x2_t val)
-{
-    return vrecp_f32(vrsqrt_f32(val));
-}
-
-// table lookup with the table in a 128-bit register
-
-inline uint8x8_t vqtbl1_u8 (uint8x16_t a, uint8x8_t b)
-{
-#ifdef __aarch64__
-    // AArch64 supports this natively
-    return ::vqtbl1_u8(a, b);
-#else
-    union { uint8x16_t v; uint8x8x2_t w; } u = { a };
-    return vtbl2_u8(u.w, b);
-#endif
-}
-
-} }
-
-#endif
--- a/3rdparty/carotene/src/laplacian.cpp
+++ b/3rdparty/carotene/src/laplacian.cpp
@ -1,713 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "saturate_cast.hpp"
-
-#include <vector>
-
-namespace CAROTENE_NS {
-
-bool isLaplacian3x3Supported(const Size2D &size, BORDER_MODE border)
-{
-    return isSupportedConfiguration() && size.width >= 8 &&
-        (border == BORDER_MODE_CONSTANT ||
-            border == BORDER_MODE_REPLICATE);
-}
-
-void Laplacian3x3(const Size2D &size,
-                  const u8 * srcBase, ptrdiff_t srcStride,
-                  u8 * dstBase, ptrdiff_t dstStride,
-                  BORDER_MODE border, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isLaplacian3x3Supported(size, border));
-#ifdef CAROTENE_NEON
-    const uint16x8_t v_border_x3 = vdupq_n_u16(borderValue * 3);
-    const uint16x8_t v_zero = vdupq_n_u16(0);
-    const uint8x8_t v_border = vdup_n_u8(borderValue);
-
-    uint8x8_t vsub;
-    uint16x8_t tprev = v_zero, tcurr = v_zero, tnext = v_zero;
-    uint16x8_t t0 = v_zero, t1 = v_zero, t2 = v_zero;
-
-    ptrdiff_t width = (ptrdiff_t)size.width, height = (ptrdiff_t)size.height;
-
-    for (ptrdiff_t y = 0; y < height; ++y)
-    {
-        const u8 * srow0 = y == 0 && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::max<ptrdiff_t>(y - 1, 0));
-        const u8 * srow1 = internal::getRowPtr(srcBase, srcStride, y);
-        const u8 * srow2 = y + 1 == height && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::min(y + 1, height - 1));
-        u8 * drow = internal::getRowPtr(dstBase, dstStride, y);
-
-        s16 prevx = 0, currx = 0, nextx = 0;
-        ptrdiff_t x = 0;
-        const ptrdiff_t bwidth = y + 2 < height ? width : (width - 8);
-
-        // perform vertical convolution
-        for ( ; x <= bwidth; x += 8)
-        {
-            internal::prefetch(srow0 + x);
-            internal::prefetch(srow1 + x);
-            internal::prefetch(srow2 + x);
-
-            uint8x8_t x0 = !srow0 ? v_border : vld1_u8(srow0 + x);
-            uint8x8_t x1 = vld1_u8(srow1 + x);
-            uint8x8_t x2 = !srow2 ? v_border : vld1_u8(srow2 + x);
-
-            // calculate values for plain CPU part below if needed
-            if (x + 8 >= bwidth)
-            {
-                ptrdiff_t x3 = x == width ? width - 1 : x;
-                ptrdiff_t x4 = border == BORDER_MODE_CONSTANT ? x3 - 1 : std::max<ptrdiff_t>(x3 - 1, 0);
-
-                if (border == BORDER_MODE_CONSTANT && x4 < 0)
-                    prevx = borderValue;
-                else
-                    prevx = (srow2 ? srow2[x4] : borderValue) + srow1[x4] + (srow0 ? srow0[x4] : borderValue);
-
-                currx = (srow2 ? srow2[x3] : borderValue) + srow1[x3] + (srow0 ? srow0[x3] : borderValue);
-            }
-
-            // make shift
-            if (x)
-            {
-                tprev = tcurr;
-                tcurr = tnext;
-            }
-
-            // and calculate next value
-            tnext = vaddw_u8(vaddl_u8(x0, x1), x2);
-
-            // make extrapolation for the first elements
-            if (!x)
-            {
-                // make border
-                if (border == BORDER_MODE_CONSTANT)
-                    tcurr = v_border_x3;
-                else if (border == BORDER_MODE_REPLICATE)
-                    tcurr = vdupq_n_u16(vgetq_lane_u16(tnext, 0));
-
-                vsub = x1;
-
-                continue;
-            }
-
-            // combine 3 "shifted" vectors
-            t0 = vextq_u16(tprev, tcurr, 7);
-            t1 = tcurr;
-            t2 = vextq_u16(tcurr, tnext, 1);
-
-            // and add them
-            t0 = vqaddq_u16(t0, vqaddq_u16(t1, t2));
-
-            int16x8_t tt0 = vsubq_s16(vreinterpretq_s16_u16(t0),
-                                      vreinterpretq_s16_u16(vaddw_u8(vshll_n_u8(vsub, 3), vsub)));
-            uint8x8_t it0 = vqmovun_s16(tt0);
-            vst1_u8(drow + x - 8, it0);
-
-            vsub = x1;
-        }
-
-        x -= 8;
-        if (x == width)
-            --x;
-
-        for ( ; x < width; ++x)
-        {
-            // make extrapolation for the last elements
-            if (x + 1 >= width)
-            {
-                if (border == BORDER_MODE_CONSTANT)
-                    nextx = borderValue * 3;
-                else if (border == BORDER_MODE_REPLICATE)
-                    nextx = srow2[x] + srow1[x] + srow0[x];
-            }
-            else
-            {
-                nextx = (srow2 ? srow2[x + 1] : borderValue) +
-                                 srow1[x + 1] +
-                        (srow0 ? srow0[x + 1] : borderValue);
-            }
-
-            s32 val = (prevx + currx + nextx) - 9 * srow1[x];
-            drow[x] = internal::saturate_cast<u8>((s32)val);
-
-            // make shift
-            prevx = currx;
-            currx = nextx;
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)border;
-    (void)borderValue;
-#endif
-}
-
-bool isLaplacianOpenCVSupported(const Size2D &size, BORDER_MODE border)
-{
-    return isSupportedConfiguration() &&
-        size.width >= 8 && size.height >= 1 &&
-        (border == BORDER_MODE_CONSTANT   ||
-         border == BORDER_MODE_REFLECT    ||
-         border == BORDER_MODE_REFLECT101 ||
-         border == BORDER_MODE_REPLICATE);
-}
-
-void Laplacian1OpenCV(const Size2D &size,
-                      const u8 * srcBase, ptrdiff_t srcStride,
-                      s16 * dstBase, ptrdiff_t dstStride,
-                      BORDER_MODE border, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isLaplacianOpenCVSupported(size, border));
-#ifdef CAROTENE_NEON
-    ptrdiff_t rows = size.height, cols = size.width;
-
-    std::vector<u8> _tmp;
-    u8 *tmp = 0;
-    if (border == BORDER_MODE_CONSTANT)
-    {
-        _tmp.assign(cols + 4,borderValue);
-        tmp = &_tmp[2];
-    }
-
-    for( ptrdiff_t y = 0; y < rows; y++ )
-    {
-        const u8* v0 = 0;
-        const u8* v1 = internal::getRowPtr(srcBase, srcStride, y);
-        const u8* v2 = 0;
-        // make border
-        if (border == BORDER_MODE_REFLECT101) {
-            v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : y+1);
-            v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
-        } else  if (border == BORDER_MODE_CONSTANT) {
-            v0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;
-            v2 =  y < rows-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;
-        } else {
-            v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
-            v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);
-        }
-        s16* drow = internal::getRowPtr(dstBase, dstStride, y);
-
-        int16x8_t tcurr = vmovq_n_s16(0x0);
-        int16x8_t tnext = vmovq_n_s16(0x0);
-        int16x8_t t0, t2;
-        uint8x8_t xx0 = vmov_n_u8(0x0);
-        uint8x8_t xx1 = vmov_n_u8(0x0);
-        uint8x8_t xx2 = vmov_n_u8(0x0);
-        ptrdiff_t x = 0;
-        const ptrdiff_t bcols = y + 2 < rows ? cols : (cols - 8);
-        for( ; x <= bcols; x += 8 )
-        {
-            internal::prefetch(v0 + x);
-            internal::prefetch(v1 + x);
-            internal::prefetch(v2 + x);
-
-            uint8x8_t x0 = vld1_u8(v0 + x);
-            uint8x8_t x1 = vld1_u8(v1 + x);
-            uint8x8_t x2 = vld1_u8(v2 + x);
-
-            if(x) {
-                xx0 = xx1;
-                xx1 = xx2;
-            } else {
-                xx1 = x1;
-                // make border
-                    if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)
-                    {
-                        xx1 = vset_lane_u8(vget_lane_u8(x1, 0),x1, 7);
-                    }
-                    else if (border == BORDER_MODE_CONSTANT)
-                    {
-                        xx1 = vset_lane_u8(borderValue, x1, 7);
-                    }
-                    else if (border == BORDER_MODE_REFLECT101)
-                    {
-                        xx1 = vset_lane_u8(vget_lane_u8(x1, 1),x1, 7);
-                    }
-            }
-            xx2 = x1;
-
-            if(x) {
-                tcurr = tnext;
-            }
-            tnext = vsubq_s16(vreinterpretq_s16_u16(vaddl_u8(x0, x2)),
-                              vreinterpretq_s16_u16(vshll_n_u8(x1, 2)));
-
-            if(!x) {
-                tcurr = tnext;
-                continue;
-            }
-            t0 = vreinterpretq_s16_u16(vmovl_u8(vext_u8(xx0, xx1, 7)));
-            t2 = vreinterpretq_s16_u16(vmovl_u8(vext_u8(xx1, xx2, 1)));
-            t0 = vaddq_s16(vqaddq_s16(t0, t2), tcurr);
-
-            vst1q_s16(drow + x - 8, t0);
-        }
-
-        x -= 8;
-        if(x == cols){
-            x--;
-        }
-
-        for( ; x < cols; x++ )
-        {
-            s16 nextx;
-            s16 prevx;
-            // make border
-            if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)
-            {
-                prevx = x == 0 ? v1[0] : v1[x-1];
-                nextx = x == cols-1 ? v1[x] : v1[x+1];
-            }
-            else if (border == BORDER_MODE_REFLECT101)
-            {
-                prevx = x == 0 ? v1[1] : v1[x-1];
-                nextx = x == cols-1 ? v1[x-1] : v1[x+1];
-            }
-            else //if (border == BORDER_MODE_CONSTANT)
-            {
-                prevx = x == 0 ? borderValue : v1[x-1];
-                nextx = x == cols-1 ? borderValue : v1[x+1];
-            }
-            *(drow+x) = prevx + nextx - 4*v1[x] + v0[x] + v2[x];
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)border;
-    (void)borderValue;
-#endif
-}
-
-void Laplacian3OpenCV(const Size2D &size,
-                      const u8 * srcBase, ptrdiff_t srcStride,
-                      s16 * dstBase, ptrdiff_t dstStride,
-                      BORDER_MODE border, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isLaplacianOpenCVSupported(size, border));
-#ifdef CAROTENE_NEON
-    ptrdiff_t rows = size.height, cols = size.width;
-
-    std::vector<u8> _tmp;
-    u8 *tmp = 0;
-    if (border == BORDER_MODE_CONSTANT)
-    {
-        _tmp.assign(cols + 4,borderValue);
-        tmp = &_tmp[2];
-    }
-
-    for( ptrdiff_t y = 0; y < rows; y++ )
-    {
-        const u8* v0 = 0;
-        const u8* v1 = internal::getRowPtr(srcBase, srcStride, y);
-        const u8* v2 = 0;
-        // make border
-        if (border == BORDER_MODE_REFLECT101) {
-            v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : y+1);
-            v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
-        } else  if (border == BORDER_MODE_CONSTANT) {
-            v0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;
-            v2 = y < rows-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;
-        } else {
-            v0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
-            v2 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);
-        }
-        s16* drow = internal::getRowPtr(dstBase, dstStride, y);
-
-        int16x8_t tprev = vmovq_n_s16(0x0);
-        int16x8_t tcurr = vmovq_n_s16(0x0);
-        int16x8_t tnext = vmovq_n_s16(0x0);
-        int16x8_t tc = vmovq_n_s16(0x0);
-        int16x8_t t0, t2, tcnext;
-        ptrdiff_t x = 0;
-        const ptrdiff_t bcols = y + 2 < rows ? cols : (cols - 8);
-        for( ; x <= bcols; x += 8 )
-        {
-            internal::prefetch(v0 + x);
-            internal::prefetch(v1 + x);
-            internal::prefetch(v2 + x);
-
-            uint8x8_t x0 = vld1_u8(v0 + x);
-            uint8x8_t x1 = vld1_u8(v1 + x);
-            uint8x8_t x2 = vld1_u8(v2 + x);
-            tcnext = vreinterpretq_s16_u16(vshll_n_u8(x1, 2));
-
-            if(x) {
-                tprev = tcurr;
-                tcurr = tnext;
-            }
-            tnext = vreinterpretq_s16_u16(vaddl_u8(x0, x2));
-
-            if(!x) {
-                tcurr = tnext;
-                tc = tcnext;
-
-                // make border
-                    if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)
-                    {
-                        tcurr = vsetq_lane_s16(vgetq_lane_s16(tcurr, 0),tcurr, 7);
-                    }
-                    else if (border == BORDER_MODE_CONSTANT)
-                    {
-                        tcurr = vsetq_lane_s16(borderValue, tcurr, 7);
-                    }
-                    else if (border == BORDER_MODE_REFLECT101)
-                    {
-                        tcurr = vsetq_lane_s16(vgetq_lane_s16(tcurr, 1),tcurr, 7);
-                    }
-                continue;
-            }
-
-            t0 = vextq_s16(tprev, tcurr, 7);
-            t2 = vextq_s16(tcurr, tnext, 1);
-
-            t0 = vsubq_s16(vqaddq_s16(t0, t2), tc);
-            tc = tcnext;
-
-            t0 = vshlq_n_s16(t0, 1);
-            vst1q_s16(drow + x - 8, t0);
-        }
-        x -= 8;
-        if(x == cols){
-            x--;
-        }
-
-        for( ; x < cols; x++ )
-        {
-            s16 nextx, nextx2;
-            s16 prevx, prevx2;
-            // make border
-            if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT)
-            {
-                prevx = x == 0 ? v0[0] : v0[x-1];
-                prevx2 = x == 0 ? v2[0] : v2[x-1];
-                nextx = x == cols-1 ? v0[x] : v0[x+1];
-                nextx2 = x == cols-1 ? v2[x] : v2[x+1];
-            }
-            else if (border == BORDER_MODE_REFLECT101)
-            {
-                prevx = x == 0 ? v0[1] : v0[x-1];
-                prevx2 = x == 0 ? v2[1] : v2[x-1];
-                nextx = x == cols-1 ? v0[x-1] : v0[x+1];
-                nextx2 = x == cols-1 ? v2[x-1] : v2[x+1];
-            }
-            else //if (border == BORDER_MODE_CONSTANT)
-            {
-                prevx = x == 0 ? borderValue : v0[x-1];
-                prevx2 = x == 0 ? borderValue : v2[x-1];
-                nextx = x == cols-1 ? borderValue : v0[x+1];
-                nextx2 = x == cols-1 ? borderValue : v2[x+1];
-            }
-            s16 res = prevx + nextx - 4*v1[x] + prevx2 + nextx2;
-            *(drow+x) = 2*res;
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)border;
-    (void)borderValue;
-#endif
-}
-
-void Laplacian5OpenCV(const Size2D &size,
-                      const u8 * srcBase, ptrdiff_t srcStride,
-                      s16 * dstBase, ptrdiff_t dstStride,
-                      BORDER_MODE border, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isLaplacianOpenCVSupported(size, border));
-#ifdef CAROTENE_NEON
-    ptrdiff_t rows = size.height, cols = size.width;
-
-    std::vector<u8> _tmp;
-    u8 *tmp = 0;
-    if (border == BORDER_MODE_CONSTANT)
-    {
-        _tmp.assign(cols + 4,borderValue);
-        tmp = &_tmp[2];
-    }
-
-    for( ptrdiff_t y = 0; y < rows; y++ )
-    {
-        const u8* v0 = 0;
-        const u8* v1 = 0;
-        const u8* v2 = internal::getRowPtr(srcBase, srcStride, y);
-        const u8* v3 = 0;
-        const u8* v4 = 0;
-        // make border
-        if (border == BORDER_MODE_REPLICATE) {
-            v0 = internal::getRowPtr(srcBase, srcStride, y > 1 ? y-2 : 0);
-            v1 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
-            v3 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);
-            v4 = internal::getRowPtr(srcBase, srcStride, y < rows-2 ? y+2 : rows > 0 ? rows-1 : 0);
-        } else if (border == BORDER_MODE_REFLECT) {
-            v0 = internal::getRowPtr(srcBase, srcStride, y > 1 ? y-2 : rows > 1 ? 1-y : 0);
-            v1 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
-            v3 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 0 ? rows-1 : 0);
-            v4 = internal::getRowPtr(srcBase, srcStride, y < rows-2 ? y+2 : rows > 1 ? 2*rows-(y+3) : 0);
-        } else if (border == BORDER_MODE_REFLECT101) {
-            v0 = internal::getRowPtr(srcBase, srcStride, y > 1 ? y-2 : rows > 2-y ? 2-y : 0); ///check
-            v1 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : rows > 1 ? 1 : 0);
-            v3 = internal::getRowPtr(srcBase, srcStride, y < rows-1 ? y+1 : rows > 1 ? rows-2 : 0);
-            v4 = internal::getRowPtr(srcBase, srcStride, y < rows-2 ? y+2 : rows > 2 ? 2*rows-(y+4) : 0);///bad if rows=2 y=1   rows - 4 + (2,1)
-        } else if (border == BORDER_MODE_CONSTANT) {
-            v0 = y > 1 ? internal::getRowPtr(srcBase, srcStride, y-2) : tmp;
-            v1 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;
-            v3 = y < rows-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;
-            v4 = y < rows-2 ? internal::getRowPtr(srcBase, srcStride, y+2) : tmp;
-        }
-        s16* drow = internal::getRowPtr(dstBase, dstStride, y);
-
-        int16x8_t tnext, tc, t0;
-        int16x8_t tnext2, tnext3;
-        int16x8_t tnext1Old, tnext2Old, tnext3Old;
-        int16x8_t tnext4OldOldOld, tnext5OldOldOld;
-
-        int16x8_t tcurr1 = vmovq_n_s16(0x0);
-        int16x8_t tnext1 = vmovq_n_s16(0x0);
-        int16x8_t tprev1 = vmovq_n_s16(0x0);
-        int16x8_t tpprev1 = vmovq_n_s16(0x0);
-        int16x8_t tppprev1 = vmovq_n_s16(0x0);
-
-        int16x8_t tnext4Old = vmovq_n_s16(0x0);
-        int16x8_t tnext5Old = vmovq_n_s16(0x0);
-        int16x8_t tnext1OldOld = vmovq_n_s16(0x0);
-        int16x8_t tnext2OldOld = vmovq_n_s16(0x0);
-        int16x8_t tnext3OldOld = vmovq_n_s16(0x0);
-        int16x8_t tnext4OldOld = vmovq_n_s16(0x0);
-        int16x8_t tnext5OldOld = vmovq_n_s16(0x0);
-
-        // do vertical convolution
-        ptrdiff_t x = 0;
-        const ptrdiff_t bcols = y + 3 < rows ? cols : (cols - 8);
-        for( ; x <= bcols; x += 8 )
-        {
-            internal::prefetch(v0 + x);
-            internal::prefetch(v1 + x);
-            internal::prefetch(v2 + x);
-            internal::prefetch(v3 + x);
-            internal::prefetch(v4 + x);
-
-            uint8x8_t x0 = vld1_u8(v0 + x);
-            uint8x8_t x1 = vld1_u8(v1 + x);
-            uint8x8_t x2 = vld1_u8(v2 + x);
-            uint8x8_t x3 = vld1_u8(v3 + x);
-            uint8x8_t x4 = vld1_u8(v4 + x);
-            if(x) {
-                tcurr1 = tnext1;
-            }
-
-            tnext4OldOldOld = tnext4Old;
-            tnext5OldOldOld = tnext5Old;
-            tnext1Old = tnext1OldOld;
-            tnext2Old = tnext2OldOld;
-            tnext3Old = tnext3OldOld;
-            tnext4Old = tnext4OldOld;
-            tnext5Old = tnext5OldOld;
-
-            tnext3 = vreinterpretq_s16_u16(vaddq_u16(vaddl_u8(x3, x2),vaddl_u8(x2, x1)));
-            tnext3 = vshlq_n_s16(tnext3, 1);
-
-            tc = vreinterpretq_s16_u16(vsubl_u8(x4, x2));
-            tnext = vreinterpretq_s16_u16(vsubl_u8(x2, x0));
-            tnext2 = vsubq_s16(tc, tnext);
-
-            tnext1 = vaddq_s16(tnext3, tnext2);
-            // tnext1 = x0 + 2*x1 + 2*x2 + 2*x3 + x4
-
-            tnext2 = vshlq_n_s16(tnext2, 1);
-            // tnext2 = 2*x4 - 4*x2 + 2*x0
-
-            tnext3 = vsubq_s16(tnext2, vshlq_n_s16(tnext3, 1));
-            // tnext3 = 2*x0 - 4*x1 - 12*x2 - 4*x3  + 2*x4
-
-            tnext1OldOld = tnext1;
-            tnext2OldOld = tnext2;
-            tnext3OldOld = tnext3;
-            tnext4OldOld = tnext2;
-            tnext5OldOld = tnext1;
-
-            if(x) {
-                tnext1 = vextq_s16(tnext1Old, tnext1, 2);
-                tcurr1 = vextq_s16(tnext2Old, tnext2, 1);
-                tprev1 = tnext3Old;
-
-                if(x!=8) {
-                    tpprev1 = vextq_s16(tnext4OldOldOld, tnext4Old, 7);
-                    tppprev1 = vextq_s16(tnext5OldOldOld, tnext5Old, 6);
-                }
-            }
-
-            if(!x) {
-                // make border
-                if (border == BORDER_MODE_REPLICATE) {
-                    tpprev1 = vextq_s16(tnext2, tnext2, 7);
-                    tpprev1 = vsetq_lane_s16(vgetq_lane_s16(tpprev1, 1),tpprev1, 0);
-
-                    tprev1 = vextq_s16(tnext1, tnext1, 6);
-                    tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 2),tprev1, 0);
-                    tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 2),tprev1, 1);
-                } else if (border == BORDER_MODE_REFLECT) {
-                    tpprev1 = vextq_s16(tnext2, tnext2, 7);
-                    tpprev1 = vsetq_lane_s16(vgetq_lane_s16(tpprev1, 1),tpprev1, 0);
-
-                    tprev1 = vextq_s16(tnext1, tnext1, 6);
-                    tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 3),tprev1, 0);
-                    tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 2),tprev1, 1);
-                } else if (border == BORDER_MODE_REFLECT101) {
-                    tpprev1 = vextq_s16(tnext2, tnext2, 7);
-                    tpprev1 = vsetq_lane_s16(vgetq_lane_s16(tpprev1, 2),tpprev1, 0);
-
-                    tprev1 = vextq_s16(tnext1, tnext1, 6);
-                    tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 3),tprev1, 1);
-                    tprev1 = vsetq_lane_s16(vgetq_lane_s16(tprev1, 4),tprev1, 0);
-                } else if (border == BORDER_MODE_CONSTANT) {
-                    tpprev1 = vextq_s16(tnext2, tnext2, 7);
-                    tpprev1 = vsetq_lane_s16(borderValue, tpprev1, 0);
-
-                    tprev1 = vextq_s16(tnext1, tnext1, 6);
-                    tprev1 = vsetq_lane_s16(borderValue, tprev1, 0);
-                    tprev1 = vsetq_lane_s16(borderValue, tprev1, 1);
-                }
-                tppprev1 = tprev1;
-                continue;
-            }
-
-            t0 = vaddq_s16(vaddq_s16(vqaddq_s16(tcurr1, tprev1), vqaddq_s16(tpprev1, tppprev1)), tnext1);
-            t0 = vaddq_s16(t0, t0);
-            vst1q_s16(drow + x - 8, t0);
-        }
-        x -= 8;
-        if(x >= cols - 1)
-            x = cols-2;
-
-        s16 pprevx = 0;
-        s16 prevx = 0;
-        s16 nextx = 0;
-        s16 nnextx = 0;
-
-        for( ; x < cols; x++ )
-        {
-            if (x == 0) {
-                // make border
-                if (border == BORDER_MODE_REPLICATE) {
-                    pprevx = v0[0] + 2*v1[0] + 2*v2[0] + 2*v3[0] + v4[0];
-                    prevx = 2*v0[0] - 4*v2[0] + 2*v4[0];
-                } else if (border == BORDER_MODE_REFLECT) {
-                    pprevx = v0[1] + 2*v1[1] + 2*v2[1] + 2*v3[1] + v4[1];
-                    prevx = 2*v0[0] - 4*v2[0] + 2*v4[0];
-                } else if (border == BORDER_MODE_REFLECT101) {
-                    pprevx = v0[2] + 2*v1[2] + 2*v2[2] + 2*v3[2] + v4[2];
-                    prevx = 2*v0[1] - 4*v2[1] + 2*v4[1];
-                } else if (border == BORDER_MODE_CONSTANT) {
-                    pprevx = 8 * borderValue;
-                    prevx = 0;
-                }
-            } else if (x == 1) {
-                // make border
-                if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT) {
-                    pprevx = v0[0] + 2*v1[0] + 2*v2[0] + 2*v3[0] + v4[0];
-                } else if (border == BORDER_MODE_REFLECT101) {
-                    pprevx = v0[1] + 2*v1[1] + 2*v2[1] + 2*v3[1] + v4[1];
-                } else if (border == BORDER_MODE_CONSTANT) {
-                    pprevx = 8 * borderValue;
-                }
-                prevx = 2*v0[0] - 4*v2[0] + 2*v4[0];
-            } else {
-                pprevx = v0[x-2] + 2*v1[x-2] + 2*v2[x-2] + 2*v3[x-2] + v4[x-2];
-                prevx = 2*v0[x-1] - 4*v2[x-1] + 2*v4[x-1];
-            }
-            s16 currx = 2*v0[x] - 4*v1[x] - 12*v2[x] - 4*v3[x] + 2*v4[x];
-            if (x == cols-1) {
-                // make border
-                if (border == BORDER_MODE_REPLICATE) {
-                    nextx = 2*v0[x] - 4*v2[x] + 2*v4[x];
-                    nnextx = v0[x] + 2*v1[x] + 2*v2[x] + 2*v3[x] + v4[x];
-                } else if (border == BORDER_MODE_REFLECT) {
-                    nextx = 2*v0[x] - 4*v2[x] + 2*v4[x];
-                    nnextx = v0[x-1] + 2*v1[x-1] + 2*v2[x-1] + 2*v3[x-1] + v4[x-1];
-                } else if (border == BORDER_MODE_REFLECT101) {
-                    nextx = 2*v0[x-1] - 4*v2[x-1] + 2*v4[x-1];
-                    nnextx = v0[x-2] + 2*v1[x-2] + 2*v2[x-2] + 2*v3[x-2] + v4[x-2];
-                } else if (border == BORDER_MODE_CONSTANT) {
-                    nextx = 0;
-                    nnextx = 8 * borderValue;
-                }
-            } else if (x == cols-2) {
-                // make border
-                if (border == BORDER_MODE_REPLICATE || border == BORDER_MODE_REFLECT) {
-                    nnextx = v0[x+1] + 2*v1[x+1] + 2*v2[x+1] + 2*v3[x+1] + v4[x+1];
-                } else if (border == BORDER_MODE_REFLECT101) {
-                    nnextx = v0[x] + 2*v1[x] + 2*v2[x] + 2*v3[x] + v4[x];
-                } else if (border == BORDER_MODE_CONSTANT) {
-                    nnextx = 8 * borderValue;
-                }
-                nextx = 2*v0[x+1] - 4*v2[x+1] + 2*v4[x+1];
-            } else {
-                nextx = 2*v0[x+1] - 4*v2[x+1] + 2*v4[x+1];
-                nnextx = v0[x+2] + 2*v1[x+2] + 2*v2[x+2] + 2*v3[x+2] + v4[x+2];
-            }
-            s16 res = pprevx + prevx + currx + nextx + nnextx;
-            *(drow+x) = 2*res;
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)border;
-    (void)borderValue;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/magnitude.cpp
+++ b/3rdparty/carotene/src/magnitude.cpp
@ -1,160 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-#include <cmath>
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-struct Magnitude
-{
-    typedef s16 type;
-
-    void operator() (const int16x8_t & v_src0, const int16x8_t & v_src1,
-              int16x8_t & v_dst) const
-    {
-        int16x4_t v_src0_p = vget_low_s16(v_src0), v_src1_p = vget_low_s16(v_src1);
-        float32x4_t v_sqr0 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),
-                                       vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));
-        v_src0_p = vget_high_s16(v_src0);
-        v_src1_p = vget_high_s16(v_src1);
-        float32x4_t v_sqr1 = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0_p, v_src0_p)),
-                                       vcvtq_f32_s32(vmull_s16(v_src1_p, v_src1_p)));
-
-        int32x4_t v_sqrt0 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr0));
-        int32x4_t v_sqrt1 = vcvtq_s32_f32(internal::vsqrtq_f32(v_sqr1));
-
-        v_dst = vcombine_s16(vqmovn_s32(v_sqrt0), vqmovn_s32(v_sqrt1));
-    }
-
-    void operator() (const int16x4_t & v_src0, const int16x4_t & v_src1,
-              int16x4_t & v_dst) const
-    {
-        float32x4_t v_tmp = vaddq_f32(vcvtq_f32_s32(vmull_s16(v_src0, v_src0)),
-                                      vcvtq_f32_s32(vmull_s16(v_src1, v_src1)));
-        int32x4_t v_sqrt = vcvtq_s32_f32(internal::vsqrtq_f32(v_tmp));
-        v_dst = vqmovn_s32(v_sqrt);
-    }
-
-    void operator() (const short * src0, const short * src1, short * dst) const
-    {
-        f32 src0val = (f32)src0[0], src1val = (f32)src1[0];
-        dst[0] = internal::saturate_cast<s16>((s32)sqrtf(src0val * src0val + src1val * src1val));
-    }
-};
-
-struct MagnitudeF32
-{
-    typedef f32 type;
-
-    void operator() (const float32x4_t & v_src0, const float32x4_t & v_src1,
-              float32x4_t & v_dst) const
-    {
-        v_dst = internal::vsqrtq_f32(vaddq_f32(vmulq_f32(v_src0, v_src0), vmulq_f32(v_src1, v_src1)));
-    }
-
-    void operator() (const float32x2_t & v_src0, const float32x2_t & v_src1,
-              float32x2_t & v_dst) const
-    {
-        v_dst = internal::vsqrt_f32(vadd_f32(vmul_f32(v_src0, v_src0), vmul_f32(v_src1, v_src1)));
-    }
-
-    void operator() (const f32 * src0, const f32 * src1, f32 * dst) const
-    {
-        dst[0] = sqrtf(src0[0] * src0[0] + src1[0] * src1[0]);
-    }
-};
-
-} // namespace
-
-#endif
-
-void magnitude(const Size2D &size,
-               const s16 * src0Base, ptrdiff_t src0Stride,
-               const s16 * src1Base, ptrdiff_t src1Stride,
-               s16 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride,
-                         Magnitude());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void magnitude(const Size2D &size,
-               const f32 * src0Base, ptrdiff_t src0Stride,
-               const f32 * src1Base, ptrdiff_t src1Stride,
-               f32 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride,
-                         MagnitudeF32());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/meanstddev.cpp
+++ b/3rdparty/carotene/src/meanstddev.cpp
@ -1,163 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include <cmath>
-
-namespace CAROTENE_NS {
-
-void meanStdDev(const Size2D &size,
-                const u8 * srcBase, ptrdiff_t srcStride,
-                f32 * pMean, f32 * pStdDev)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    f64 fsum = 0.0f, fsqsum = 0.0f;
-    sqsum(size, srcBase, srcStride, &fsum, &fsqsum, 1);
-
-    // calc mean and stddev
-    f64 itotal = 1.0 / size.total();
-    f64 mean = fsum * itotal;
-    f64 stddev = sqrt(std::max(fsqsum * itotal - mean * mean, 0.0));
-
-    if (pMean)
-        *pMean = mean;
-    if (pStdDev)
-        *pStdDev = stddev;
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)pMean;
-    (void)pStdDev;
-#endif
-}
-
-void meanStdDev(const Size2D &size,
-                const u16 * srcBase, ptrdiff_t srcStride,
-                f32 * pMean, f32 * pStdDev)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t blockSize0 = 1 << 10, roiw4 = size.width & ~3;
-    f64 fsum = 0.0f, fsqsum = 0.0f;
-
-    f32 arsum[8];
-    uint32x4_t v_zero = vdupq_n_u32(0u), v_sum;
-    float32x4_t v_zero_f = vdupq_n_f32(0.0f), v_sqsum;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u16 * src = internal::getRowPtr(srcBase, srcStride, i);
-        size_t j = 0u;
-
-        while (j < roiw4)
-        {
-            size_t blockSize = std::min(roiw4 - j, blockSize0) + j;
-            v_sum = v_zero;
-            v_sqsum = v_zero_f;
-
-            for ( ; j + 16 < blockSize ; j += 16)
-            {
-                internal::prefetch(src + j);
-                uint16x8_t v_src0 = vld1q_u16(src + j), v_src1 = vld1q_u16(src + j + 8);
-
-                // 0
-                uint32x4_t v_srclo = vmovl_u16(vget_low_u16(v_src0));
-                uint32x4_t v_srchi = vmovl_u16(vget_high_u16(v_src0));
-                v_sum = vaddq_u32(v_sum, vaddq_u32(v_srclo, v_srchi));
-                float32x4_t v_srclo_f = vcvtq_f32_u32(v_srclo);
-                float32x4_t v_srchi_f = vcvtq_f32_u32(v_srchi);
-                v_sqsum = vmlaq_f32(v_sqsum, v_srclo_f, v_srclo_f);
-                v_sqsum = vmlaq_f32(v_sqsum, v_srchi_f, v_srchi_f);
-
-                // 1
-                v_srclo = vmovl_u16(vget_low_u16(v_src1));
-                v_srchi = vmovl_u16(vget_high_u16(v_src1));
-                v_sum = vaddq_u32(v_sum, vaddq_u32(v_srclo, v_srchi));
-                v_srclo_f = vcvtq_f32_u32(v_srclo);
-                v_srchi_f = vcvtq_f32_u32(v_srchi);
-                v_sqsum = vmlaq_f32(v_sqsum, v_srclo_f, v_srclo_f);
-                v_sqsum = vmlaq_f32(v_sqsum, v_srchi_f, v_srchi_f);
-            }
-
-            for ( ; j < blockSize; j += 4)
-            {
-                uint32x4_t v_src = vmovl_u16(vld1_u16(src + j));
-                float32x4_t v_src_f = vcvtq_f32_u32(v_src);
-                v_sum = vaddq_u32(v_sum, v_src);
-                v_sqsum = vmlaq_f32(v_sqsum, v_src_f, v_src_f);
-            }
-
-            vst1q_f32(arsum, vcvtq_f32_u32(v_sum));
-            vst1q_f32(arsum + 4, v_sqsum);
-
-            fsum += (f64)arsum[0] + arsum[1] + arsum[2] + arsum[3];
-            fsqsum += (f64)arsum[4] + arsum[5] + arsum[6] + arsum[7];
-        }
-
-        // collect a few last elements in the current row
-        for ( ; j < size.width; ++j)
-        {
-            f32 srcval = src[j];
-            fsum += srcval;
-            fsqsum += srcval * srcval;
-        }
-    }
-
-    // calc mean and stddev
-    f64 itotal = 1.0 / size.total();
-    f64 mean = fsum * itotal;
-    f64 stddev = sqrt(std::max(fsqsum * itotal - mean * mean, 0.0));
-
-    if (pMean)
-        *pMean = mean;
-    if (pStdDev)
-        *pStdDev = stddev;
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)pMean;
-    (void)pStdDev;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/median_filter.cpp
+++ b/3rdparty/carotene/src/median_filter.cpp
@ -1,227 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-/*
- * The code here is based on the code in
- * <http://ndevilla.free.fr/median/median/src/optmed.c>, which is in public domain.
- * See also <http://ndevilla.free.fr/median/median/index.html>.
- */
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-namespace {
-
-    uint8x16_t getLeftReplicate(uint8x16_t r, u32 cn)
-    {
-        u8 buf[16+8];
-        vst1q_u8(buf+cn, r);
-        for (u32 i = 0; i < cn; ++i) buf[i] = buf[cn+i];
-        return vld1q_u8(buf);
-    }
-
-    uint8x8_t getRightReplicate(uint8x8_t r, u32 cn)
-    {
-        u8 buf[8+8];
-        vst1_u8(buf, r);
-        for (u32 i = 0; i < cn; ++i) buf[8+i] = buf[8-cn+i];
-        return vld1_u8(buf+cn);
-    }
-
-} // namespace
-
-//o------^-------^-----------------------------o 0
-//       |       |
-//o--^---v---^---|-------^---------------------o 1
-//   |       |   |       |
-//o--v-------v---|-------|-^-------^-------^---o 2
-//               |       | |       |       |
-//o------^-------v-----^-|-|-------|-------|---o 3
-//       |             | | |       |       |
-//o--^---v---^-----^---|-v-|---^---v---^---v---o 4
-//   |       |     |   |   |   |       |
-//o--v-------v---^-|---|---v---|-------|-------o 5
-//               | |   |       |       |
-//o------^-------|-|---v-------|-------v-------o 6
-//       |       | |           |
-//o--^---v---^---|-v-----------v---------------o 7
-//   |       |   |
-//o--v-------v---v-----------------------------o 8
-
-#define ELT(num, level) v ## num ## _lv ## level
-#define PIX_SORT(a, alvl, b, blvl, newlvl) \
-    PIX_MIN(a, alvl, b, blvl, newlvl); \
-    PIX_MAX(a, alvl, b, blvl, newlvl);
-
-#define SORT9 \
-    PIX_SORT(1, 00, 2, 00, 01); \
-    PIX_SORT(4, 00, 5, 00, 02); \
-    PIX_SORT(7, 00, 8, 00, 03); \
-    PIX_SORT(0, 00, 1, 01, 04); \
-    PIX_SORT(3, 00, 4, 02, 05); \
-    PIX_SORT(6, 00, 7, 03, 06); \
-    PIX_SORT(1, 04, 2, 01, 07); \
-    PIX_SORT(4, 05, 5, 02, 08); \
-    PIX_SORT(7, 06, 8, 03, 09); \
-    PIX_MAX (0, 04, 3, 05, 10); \
-    PIX_MIN (5, 08, 8, 09, 11); \
-    PIX_SORT(4, 08, 7, 09, 12); \
-    PIX_MAX (3, 10, 6, 06, 13); \
-    PIX_MAX (1, 07, 4, 12, 14); \
-    PIX_MIN (2, 07, 5, 11, 15); \
-    PIX_MIN (4, 14, 7, 12, 16); \
-    PIX_SORT(4, 16, 2, 15, 17); \
-    PIX_MAX (6, 13, 4, 17, 18); \
-    PIX_MIN (4, 18, 2, 17, 19);
-
-#endif
-
-bool isMedianFilter3x3Supported(const Size2D &size, u32 numChannels)
-{
-    return isSupportedConfiguration() && size.width >= 16 + numChannels && numChannels <= 8;
-}
-
-void medianFilter3x3(const Size2D &size, u32 numChannels,
-                     const u8 *srcBase, ptrdiff_t srcStride,
-                     const Margin &srcMargin,
-                     u8 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration(isMedianFilter3x3Supported(size, numChannels));
-#ifdef CAROTENE_NEON
-    u32 cn = numChannels;
-    size_t colsn = size.width * cn;
-
-    for (size_t i = 0; i < size.height; ++i) {
-        const u8* psrc1 = internal::getRowPtr(srcBase, srcStride, i);
-        const u8* psrc0 = i == 0 && srcMargin.top == 0 ? psrc1 : psrc1 - srcStride;
-        const u8* psrc2 = i + 1 == size.height && srcMargin.bottom == 0 ? psrc1 : psrc1 + srcStride;
-        u8* pdst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        {
-            uint8x16_t v3_lv00 = vld1q_u8(psrc0);
-            uint8x16_t v4_lv00 = vld1q_u8(psrc1);
-            uint8x16_t v5_lv00 = vld1q_u8(psrc2);
-            uint8x16_t v6_lv00 = vld1q_u8(psrc0 + cn);
-            uint8x16_t v7_lv00 = vld1q_u8(psrc1 + cn);
-            uint8x16_t v8_lv00 = vld1q_u8(psrc2 + cn);
-            uint8x16_t v0_lv00 = srcMargin.left > 0 ? vld1q_u8(psrc0 - cn) : getLeftReplicate(v3_lv00, cn);
-            uint8x16_t v1_lv00 = srcMargin.left > 0 ? vld1q_u8(psrc1 - cn) : getLeftReplicate(v4_lv00, cn);
-            uint8x16_t v2_lv00 = srcMargin.left > 0 ? vld1q_u8(psrc2 - cn) : getLeftReplicate(v5_lv00, cn);
-
-            goto medianBlur3x3_mainBody;
-
-            for (; j < colsn - 16; j += 16) {
-                internal::prefetch(psrc0 + j);
-                internal::prefetch(psrc1 + j);
-                internal::prefetch(psrc2 + j);
-
-                v0_lv00 = vld1q_u8(psrc0 + j - cn);
-                v1_lv00 = vld1q_u8(psrc1 + j - cn);
-                v2_lv00 = vld1q_u8(psrc2 + j - cn);
-                v3_lv00 = vld1q_u8(psrc0 + j);
-                v4_lv00 = vld1q_u8(psrc1 + j);
-                v5_lv00 = vld1q_u8(psrc2 + j);
-                v6_lv00 = vld1q_u8(psrc0 + j + cn);
-                v7_lv00 = vld1q_u8(psrc1 + j + cn);
-                v8_lv00 = vld1q_u8(psrc2 + j + cn);
-
-medianBlur3x3_mainBody:
-
-#define PIX_MIN(a, alvl, b, blvl, newlvl) uint8x16_t ELT(a, newlvl) = vminq_u8(ELT(a, alvl), ELT(b, blvl))
-#define PIX_MAX(a, alvl, b, blvl, newlvl) uint8x16_t ELT(b, newlvl) = vmaxq_u8(ELT(a, alvl), ELT(b, blvl))
-                SORT9;
-#undef PIX_MAX
-#undef PIX_MIN
-
-                vst1q_u8(pdst + j, v4_lv19);
-            }
-        }
-
-        {
-            size_t k = colsn - 8;
-            uint8x8_t v0_lv00 = vld1_u8(psrc0 + k - cn);
-            uint8x8_t v1_lv00 = vld1_u8(psrc1 + k - cn);
-            uint8x8_t v2_lv00 = vld1_u8(psrc2 + k - cn);
-            uint8x8_t v3_lv00 = vld1_u8(psrc0 + k);
-            uint8x8_t v4_lv00 = vld1_u8(psrc1 + k);
-            uint8x8_t v5_lv00 = vld1_u8(psrc2 + k);
-            uint8x8_t v6_lv00 = srcMargin.right > 0 ? vld1_u8(psrc0 + k + cn) : getRightReplicate(v3_lv00, cn);
-            uint8x8_t v7_lv00 = srcMargin.right > 0 ? vld1_u8(psrc1 + k + cn) : getRightReplicate(v4_lv00, cn);
-            uint8x8_t v8_lv00 = srcMargin.right > 0 ? vld1_u8(psrc2 + k + cn) : getRightReplicate(v5_lv00, cn);
-
-            goto medianBlur3x3_tailBody;
-
-            for (; k >= j - 8; k -= 8) {
-                v0_lv00 = vld1_u8(psrc0 + k - cn);
-                v1_lv00 = vld1_u8(psrc1 + k - cn);
-                v2_lv00 = vld1_u8(psrc2 + k - cn);
-                v3_lv00 = vld1_u8(psrc0 + k);
-                v4_lv00 = vld1_u8(psrc1 + k);
-                v5_lv00 = vld1_u8(psrc2 + k);
-                v6_lv00 = vld1_u8(psrc0 + k + cn);
-                v7_lv00 = vld1_u8(psrc1 + k + cn);
-                v8_lv00 = vld1_u8(psrc2 + k + cn);
-
-medianBlur3x3_tailBody:
-
-#define PIX_MIN(a, alvl, b, blvl, newlvl) uint8x8_t ELT(a, newlvl) = vmin_u8(ELT(a, alvl), ELT(b, blvl))
-#define PIX_MAX(a, alvl, b, blvl, newlvl) uint8x8_t ELT(b, newlvl) = vmax_u8(ELT(a, alvl), ELT(b, blvl))
-                SORT9;
-#undef PIX_MAX
-#undef PIX_MIN
-
-                vst1_u8(pdst + k, v4_lv19);
-            }
-        }
-    }
-#else
-    (void)size;
-    (void)numChannels;
-    (void)srcBase;
-    (void)srcStride;
-    (void)srcMargin;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/min_max.cpp
+++ b/3rdparty/carotene/src/min_max.cpp
@ -1,139 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include <algorithm>
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <typename T>
-struct Min
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        v_dst = internal::vminq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        v_dst = internal::vmin(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = std::min(src0[0], src1[0]);
-    }
-};
-
-template <typename T>
-struct Max
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        v_dst = internal::vmaxq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        v_dst = internal::vmax(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = std::max(src0[0], src1[0]);
-    }
-};
-
-} // namespace
-
-#define IMPL_OP(fun, op, type)                                         \
-void fun(const Size2D &size,                                           \
-         const type * src0Base, ptrdiff_t src0Stride,                  \
-         const type * src1Base, ptrdiff_t src1Stride,                  \
-         type * dstBase, ptrdiff_t dstStride)                          \
-{                                                                      \
-    internal::assertSupportedConfiguration();                          \
-    internal::vtransform(size,                                         \
-                         src0Base, src0Stride,                         \
-                         src1Base, src1Stride,                         \
-                         dstBase, dstStride, op<type>());              \
-}
-
-#else
-
-#define IMPL_OP(fun, op, type)                    \
-void fun(const Size2D &,                          \
-         const type *, ptrdiff_t,                 \
-         const type *, ptrdiff_t,                 \
-         type *, ptrdiff_t)                       \
-{                                                 \
-    internal::assertSupportedConfiguration();     \
-}
-
-#endif
-
-#define IMPL_MINMAX(type) IMPL_OP(min, Min, type) IMPL_OP(max, Max, type)
-
-IMPL_MINMAX(u8)
-IMPL_MINMAX(s8)
-IMPL_MINMAX(u16)
-IMPL_MINMAX(s16)
-IMPL_MINMAX(u32)
-IMPL_MINMAX(s32)
-IMPL_MINMAX(f32)
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/minmaxloc.cpp
+++ b/3rdparty/carotene/src/minmaxloc.cpp
--- a/3rdparty/carotene/src/morph.cpp
+++ b/3rdparty/carotene/src/morph.cpp
@ -1,728 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include <algorithm>
-#include <limits>
-#include <vector>
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-bool isMorph3x3Supported(const Size2D &size, BORDER_MODE border)
-{
-    return isSupportedConfiguration() && size.width >= 16 &&
-        (border == BORDER_MODE_CONSTANT ||
-            border == BORDER_MODE_REPLICATE);
-}
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-struct ErodeVecOp
-{
-    ErodeVecOp():borderValue(0){}
-
-    ErodeVecOp(BORDER_MODE border, u8 borderValue_) :
-        borderValue(borderValue_)
-    {
-        if (border == BORDER_MODE_REPLICATE)
-            borderValue = std::numeric_limits<u8>::max();
-    }
-
-    inline uint8x16_t operator()(uint8x16_t a, uint8x16_t b) const
-    {
-        return vminq_u8(a, b);
-    }
-
-    inline uint8x8_t operator()(uint8x8_t a, uint8x8_t b) const
-    {
-        return vmin_u8(a, b);
-    }
-
-    inline u8 operator()(u8 a, u8 b) const
-    {
-        return std::min(a, b);
-    }
-
-    u8 borderValue;
-};
-
-struct DilateVecOp
-{
-    DilateVecOp():borderValue(0){}
-
-    DilateVecOp(BORDER_MODE border, u8 borderValue_) :
-        borderValue(borderValue_)
-    {
-        if (border == BORDER_MODE_REPLICATE)
-            borderValue = std::numeric_limits<u8>::min();
-    }
-
-    inline uint8x16_t operator()(uint8x16_t a, uint8x16_t b) const
-    {
-        return vmaxq_u8(a, b);
-    }
-
-    inline uint8x8_t operator()(uint8x8_t a, uint8x8_t b) const
-    {
-        return vmax_u8(a, b);
-    }
-
-    inline u8 operator()(u8 a, u8 b) const
-    {
-        return std::max(a, b);
-    }
-
-    u8 borderValue;
-};
-
-template <typename VecOp>
-void morph3x3(const Size2D &size,
-              const u8 * srcBase, ptrdiff_t srcStride,
-              u8 * dstBase, ptrdiff_t dstStride,
-              BORDER_MODE border, const VecOp & vop)
-{
-    u8 borderValue = vop.borderValue;
-    ptrdiff_t width = (ptrdiff_t)size.width, height = (ptrdiff_t)size.height;
-
-    const uint8x16_t v_zero = vdupq_n_u8(0);
-    const uint8x16_t v_border = vdupq_n_u8(borderValue);
-
-    uint8x16_t tprev = v_zero, tcurr = v_zero, tnext = v_zero;
-    uint8x16_t t0 = v_zero, t1 = v_zero, t2 = v_zero;
-
-    for (ptrdiff_t y = 0; y < height; ++y)
-    {
-        const u8 * srow0 = y == 0 && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::max<ptrdiff_t>(y - 1, 0));
-        const u8 * srow1 = internal::getRowPtr(srcBase, srcStride, y);
-        const u8 * srow2 = y + 1 == height && border == BORDER_MODE_CONSTANT ? NULL : internal::getRowPtr(srcBase, srcStride, std::min(y + 1, height - 1));
-        u8 * drow = internal::getRowPtr(dstBase, dstStride, y);
-
-        u8 prevx = 0, currx = 0, nextx = 0;
-        ptrdiff_t x = 0;
-        const ptrdiff_t bwidth = y + 2 < height ? width : (width - 16);
-
-        // perform vertical convolution
-        for ( ; x <= bwidth; x += 16)
-        {
-            internal::prefetch(srow0 + x);
-            internal::prefetch(srow1 + x);
-            internal::prefetch(srow2 + x);
-
-            uint8x16_t x0 = !srow0 ? v_border : vld1q_u8(srow0 + x);
-            uint8x16_t x1 = vld1q_u8(srow1 + x);
-            uint8x16_t x2 = !srow2 ? v_border : vld1q_u8(srow2 + x);
-
-            // calculate values for plain CPU part below if needed
-            if (x + 16 >= bwidth)
-            {
-                ptrdiff_t x3 = x == width ? width - 1 : x;
-                ptrdiff_t x4 = border == BORDER_MODE_CONSTANT ? x3 - 1 : std::max<ptrdiff_t>(x3 - 1, 0);
-
-                if (border == BORDER_MODE_CONSTANT && x4 < 0)
-                    prevx = borderValue;
-                else
-                    prevx = vop(srow1[x4],
-                                vop(srow2 ? srow2[x4] : borderValue,
-                                    srow0 ? srow0[x4] : borderValue));
-
-                currx = vop(srow2 ? srow2[x3] : borderValue, vop(srow1[x3], srow0 ? srow0[x3] : borderValue));
-            }
-
-            // make shift
-            if (x)
-            {
-                tprev = tcurr;
-                tcurr = tnext;
-            }
-
-            // and calculate next value
-            tnext = vop(vop(x0, x1), x2);
-
-            // make extrapolation for the first elements
-            if (!x)
-            {
-                // make border
-                if (border == BORDER_MODE_CONSTANT)
-                    tcurr = v_border;
-                else if (border == BORDER_MODE_REPLICATE)
-                    tcurr = vdupq_n_u8(vgetq_lane_u8(tnext, 0));
-
-                continue;
-            }
-
-            // combine 3 "shifted" vectors
-            t0 = vextq_u8(tprev, tcurr, 15);
-            t1 = tcurr;
-            t2 = vextq_u8(tcurr, tnext, 1);
-
-            // and add them
-            t0 = vop(t0, vop(t1, t2));
-
-            vst1q_u8(drow + x - 16, t0);
-        }
-
-        x -= 16;
-        if (x == width)
-            --x;
-
-        for ( ; x < width; ++x)
-        {
-            // make extrapolation for the last elements
-            if (x + 1 >= width)
-            {
-                if (border == BORDER_MODE_CONSTANT)
-                    nextx = borderValue;
-                else if (border == BORDER_MODE_REPLICATE)
-                    nextx = vop(srow2[x], vop(srow1[x], srow0[x]));
-            }
-            else
-                nextx = vop(vop(srow2 ? srow2[x + 1] : borderValue,
-                                srow0 ? srow0[x + 1] : borderValue),
-                            srow1[x + 1]);
-
-            drow[x] = vop(prevx, vop(currx, nextx));
-
-            // make shift
-            prevx = currx;
-            currx = nextx;
-        }
-    }
-}
-
-} // namespace
-
-#endif
-
-void erode3x3(const Size2D &size,
-              const u8 * srcBase, ptrdiff_t srcStride,
-              u8 * dstBase, ptrdiff_t dstStride,
-              BORDER_MODE border, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isMorph3x3Supported(size, border));
-#ifdef CAROTENE_NEON
-    morph3x3(size,
-             srcBase, srcStride,
-             dstBase, dstStride,
-             border, ErodeVecOp(border, borderValue));
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)border;
-    (void)borderValue;
-#endif
-}
-
-void dilate3x3(const Size2D &size,
-               const u8 * srcBase, ptrdiff_t srcStride,
-               u8 * dstBase, ptrdiff_t dstStride,
-               BORDER_MODE border, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isMorph3x3Supported(size, border));
-#ifdef CAROTENE_NEON
-    morph3x3(size,
-             srcBase, srcStride,
-             dstBase, dstStride,
-             border, DilateVecOp(border, borderValue));
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)border;
-    (void)borderValue;
-#endif
-}
-
-#ifdef CAROTENE_NEON
-namespace {
-
-template<class VecUpdate>
-void MorphRow(const u8* src, u8* dst, size_t width, s32 cn, size_t ksize)
-{
-    size_t i, j, k;
-    size_t width16 = (width & -16) * cn;
-    size_t width8 = (width & -8) * cn;
-    width *= cn;
-
-    if (ksize == 1)
-    {
-        for (i = 0; i < width; i++)
-            dst[i] = src[i];
-        return;
-    }
-
-    ksize = ksize*cn;
-    VecUpdate updateOp;
-    switch(cn)
-    {
-    case 1:
-        for (i = 0; i < width16; i += 16)
-        {
-            const u8* sptr = src + i;
-            uint8x16_t s = vld1q_u8(sptr);
-            internal::prefetch(sptr);
-
-            for( k = 1; k < ksize; ++k)
-                s = updateOp(s, vld1q_u8(sptr + k));
-
-            vst1q_u8(dst + i, s);
-        }
-
-        for (; i < width8; i += 8)
-        {
-            const u8* sptr = src + i;
-            uint8x8_t s = vld1_u8(sptr);
-            internal::prefetch(sptr);
-
-            for( k = 1; k < ksize; ++k)
-                s = updateOp(s, vld1_u8(sptr + k));
-
-            vst1_u8(dst + i, s);
-        }
-        break;
-    default:
-        for (i = 0; i < width16; i += 16)
-        {
-            uint8x16_t s = vld1q_u8(src + i);
-            internal::prefetch(src + i);
-
-            for (k = cn; k < ksize; k += cn)
-                s = updateOp(s, vld1q_u8(src + i + k));
-
-            vst1q_u8(dst + i, s);
-        }
-
-        for (; i < width8; i += 8)
-        {
-            uint8x8_t s = vld1_u8(src + i);
-            internal::prefetch(src + i);
-
-            for (k = cn; k < ksize; k += cn)
-                s = updateOp(s, vld1_u8(src + i + k));
-
-            vst1_u8(dst + i, s);
-        }
-        break;
-    }
-
-    ptrdiff_t i0 = i;
-    for( k = 0; k < (size_t)cn; k++, src++, dst++ )
-    {
-        for( i = i0; i <= width - cn*2; i += cn*2 )
-        {
-            const u8* s = src + i;
-            u8 m = s[cn];
-            for( j = cn*2; j < ksize; j += cn )
-                m = updateOp(m, s[j]);
-            dst[i] = updateOp(m, s[0]);
-            dst[i+cn] = updateOp(m, s[j]);
-        }
-
-        for( ; i < width; i += cn )
-        {
-            const u8* s = src + i;
-            u8 m = s[0];
-            for( j = cn; j < ksize; j += cn )
-                m = updateOp(m, s[j]);
-            dst[i] = m;
-        }
-    }
-}
-
-template<class VecUpdate>
-void MorphColumn(const u8** src, u8* dst, ptrdiff_t dststep, size_t count, size_t width, size_t ksize)
-{
-    size_t i, k;
-    size_t width32 = width & -32;
-    VecUpdate updateOp;
-
-    uint8x16_t x0,x1,s0,s1;
-    if (ksize == 3)
-    {
-        for (; count > 1; count -= 2, dst += dststep * 2, src += 2)
-        {
-            for (i = 0; i < width32; i += 32)
-            {
-                const u8* sptr = src[1] + i;
-                s0 = vld1q_u8(sptr);
-                s1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-
-                sptr = src[2] + i;
-                x0 = vld1q_u8(sptr);
-                x1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-
-                s0 = updateOp(s0, x0);
-                s1 = updateOp(s1, x1);
-
-                sptr = src[0] + i;
-                x0 = vld1q_u8(sptr);
-                x1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-
-                vst1q_u8(dst+i, updateOp(s0, x0));
-                vst1q_u8(dst+i+16, updateOp(s1, x1));
-
-                sptr = src[3] + i;
-                x0 = vld1q_u8(sptr);
-                x1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-                vst1q_u8(dst + dststep + i, updateOp(s0, x0));
-                vst1q_u8(dst + dststep + i + 16, updateOp(s1, x1));
-
-            }
-            for(; i < width; i++ )
-            {
-                u8 s = src[1][i];
-
-                for( k = 2; k < ksize; k++ )
-                    s = updateOp(s, src[k][i]);
-
-                dst[i] = updateOp(s, src[0][i]);
-                dst[i+dststep] = updateOp(s, src[k][i]);
-            }
-        }
-    }
-    else if (ksize > 1)
-        for (; count > 1; count -= 2, dst += dststep*2, src += 2)
-        {
-            for (i = 0; i < width32; i += 32)
-            {
-                const u8* sptr = src[1] + i;
-                s0 = vld1q_u8(sptr);
-                s1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-                for (k = 2; k < ksize; k++)
-                {
-                    sptr = src[k] + i;
-                    x0 = vld1q_u8(sptr);
-                    x1 = vld1q_u8(sptr + 16);
-                    internal::prefetch(sptr);
-
-                    s0 = updateOp(s0, x0);
-                    s1 = updateOp(s1, x1);
-                }
-
-                sptr = src[0] + i;
-                x0 = vld1q_u8(sptr);
-                x1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-
-                vst1q_u8(dst+i, updateOp(s0, x0));
-                vst1q_u8(dst+i+16, updateOp(s1, x1));
-
-                sptr = src[k] + i;
-                x0 = vld1q_u8(sptr);
-                x1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-                vst1q_u8(dst + dststep + i, updateOp(s0, x0));
-                vst1q_u8(dst + dststep + i + 16, updateOp(s1, x1));
-            }
-            for(; i < width; i++ )
-            {
-                u8 s = src[1][i];
-
-                for( k = 2; k < ksize; k++ )
-                    s = updateOp(s, src[k][i]);
-
-                dst[i] = updateOp(s, src[0][i]);
-                dst[i+dststep] = updateOp(s, src[k][i]);
-            }
-        }
-
-    for (; count > 0; count--, dst += dststep, src++)
-    {
-        for (i = 0; i < width32; i += 32)
-        {
-            const u8* sptr = src[0] + i;
-            s0 = vld1q_u8(sptr);
-            s1 = vld1q_u8(sptr + 16);
-            internal::prefetch(sptr);
-
-            for (k = 1; k < ksize; k++)
-            {
-                sptr = src[k] + i;
-                x0 = vld1q_u8(sptr);
-                x1 = vld1q_u8(sptr + 16);
-                internal::prefetch(sptr);
-                s0 = updateOp(s0, x0);
-                s1 = updateOp(s1, x1);
-            }
-
-            vst1q_u8(dst + i, s0);
-            vst1q_u8(dst + i + 16, s1);
-        }
-        for(; i < width; i++ )
-        {
-            u8 s = src[0][i];
-            for( k = 1; k < ksize; k++ )
-                s = updateOp(s, src[k][i]);
-            dst[i] = s;
-        }
-    }
-}
-
-template <class Op>
-inline void morphology(const Size2D &ssize, u32 cn,
-                       const u8 * srcBase, ptrdiff_t srcStride,
-                       u8 * dstBase, ptrdiff_t dstStride,
-                       const Size2D &ksize,
-                       size_t anchorX, size_t anchorY,
-                       BORDER_MODE rowBorderType, BORDER_MODE columnBorderType,
-                       const u8 * borderValues, Margin borderMargin)
-{
-    //Temporary buffers common for all iterations
-    std::vector<u8> _srcRow(cn*(ssize.width + ksize.width - 1));
-    u8* srcRow = &_srcRow[0];
-
-    size_t bufRows = std::max<size_t>(ksize.height + 3, std::max<size_t>(anchorY, ksize.height-anchorY-1)*2+1);
-    std::vector<u8*> _rows(bufRows);
-    u8** rows = &_rows[0];
-
-    // adjust swidthcn so that the used part of buffers stays compact in memory
-    ptrdiff_t swidthcn = cn*((ssize.width + 15) & -16);// cn * (aligned ssize.width size)
-    std::vector<u8> _ringBuf(swidthcn*bufRows+16);
-    u8 * ringBuf = internal::alignPtr(&_ringBuf[0], 16);
-
-    size_t borderLength = std::max<size_t>(ksize.width - 1, 1) * cn;
-    std::vector<ptrdiff_t> _borderTab(borderLength);
-    ptrdiff_t * borderTab = &_borderTab[0];
-
-    std::vector<u8> _constBorderValue;
-    std::vector<u8> _constBorderRow;
-    u8 * constBorderValue = NULL;
-    u8 * constBorderRow = NULL;
-    if( rowBorderType == BORDER_MODE_CONSTANT || columnBorderType == BORDER_MODE_CONSTANT )
-    {
-        _constBorderValue.resize(borderLength);
-        constBorderValue = &_constBorderValue[0];
-        size_t i;
-        for(i = 0; i < cn; i++)
-            constBorderValue[i] = borderValues[i];
-        for(; i < borderLength; i++)
-            constBorderValue[i] = constBorderValue[i-cn];
-
-        if( columnBorderType == BORDER_MODE_CONSTANT )
-        {
-            _constBorderRow.resize(cn*(ssize.width + ksize.width - 1 + 16));
-            constBorderRow = internal::alignPtr(&_constBorderRow[0], 16);
-            size_t N = (ssize.width + ksize.width - 1)*cn;
-            for( i = 0; i < N; i += borderLength )
-            {
-                size_t n = std::min( borderLength, N - i );
-                for(size_t j = 0; j < n; j++)
-                    srcRow[i+j] = constBorderValue[j];
-            }
-            MorphRow<Op>(srcRow, constBorderRow, ssize.width, cn, ksize.width);
-        }
-    }
-
-    Size2D wholeSize(ssize.width + borderMargin.left + borderMargin.right,
-                     ssize.height + borderMargin.top + borderMargin.bottom);
-
-    ptrdiff_t dx1 = std::max<ptrdiff_t>(anchorX - (ptrdiff_t)borderMargin.left, 0);
-    ptrdiff_t dx2 = std::max<ptrdiff_t>((ptrdiff_t)ksize.width - anchorX - 1 - (ptrdiff_t)borderMargin.right, 0);
-    // recompute border tables
-    if( dx1 > 0 || dx2 > 0 )
-    {
-        if( rowBorderType == BORDER_MODE_CONSTANT )
-        {
-            memcpy( srcRow, &constBorderValue[0], dx1*cn );
-            memcpy( srcRow + (ssize.width + ksize.width - 1 - dx2)*cn, &constBorderValue[0], dx2*cn );
-        }
-        else
-        {
-            ptrdiff_t xofs1 = std::min<ptrdiff_t>(borderMargin.left, anchorX) - borderMargin.left;
-
-            ptrdiff_t wholeWidth = wholeSize.width;
-
-            ptrdiff_t i, j;
-            for( i = 0; i < dx1; i++ )
-            {
-                ptrdiff_t p0 = (internal::borderInterpolate(i-dx1, wholeWidth, rowBorderType) + xofs1)*cn;
-                for( j = 0; j < (ptrdiff_t)cn; j++ )
-                    borderTab[i*cn + j] = p0 + j;
-            }
-
-            for( i = 0; i < dx2; i++ )
-            {
-                ptrdiff_t p0 = (internal::borderInterpolate(wholeWidth + i, wholeWidth, rowBorderType) + xofs1)*cn;
-                for( j = 0; j < (ptrdiff_t)cn; j++ )
-                    borderTab[(i + dx1)*cn + j] = p0 + j;
-            }
-        }
-    }
-
-    ptrdiff_t startY, startY0, endY, rowCount;
-    startY = startY0 = std::max<ptrdiff_t>(borderMargin.top - anchorY, 0);
-    endY = std::min<ptrdiff_t>(borderMargin.top + ssize.height + ksize.height - anchorY - 1, wholeSize.height);
-
-    const u8* src = srcBase + (startY - borderMargin.top)*srcStride;
-    u8* dst = dstBase;
-
-    ptrdiff_t width = ssize.width, kwidth = ksize.width;
-    ptrdiff_t kheight = ksize.height, ay = anchorY;
-    ptrdiff_t width1 = ssize.width + kwidth - 1;
-    ptrdiff_t xofs1 = std::min<ptrdiff_t>(borderMargin.left, anchorX);
-    bool makeBorder = (dx1 > 0 || dx2 > 0) && rowBorderType != BORDER_MODE_CONSTANT;
-    ptrdiff_t dy = 0, i = 0;
-
-    src -= xofs1*cn;
-    ptrdiff_t count = endY - startY;
-
-    rowCount = 0;
-    for(;; dst += dstStride*i, dy += i)
-    {
-        ptrdiff_t dcount = bufRows - ay - startY - rowCount + borderMargin.top;
-        dcount = dcount > 0 ? dcount : bufRows - kheight + 1;
-        dcount = std::min(dcount, count);
-        count -= dcount;
-        for( ; dcount-- > 0; src += srcStride )
-        {
-            ptrdiff_t bi = (startY - startY0 + rowCount) % bufRows;
-            u8* brow = ringBuf + bi*swidthcn;
-
-            if( (size_t)(++rowCount) > bufRows )
-            {
-                --rowCount;
-                ++startY;
-            }
-
-            memcpy( srcRow + dx1*cn, src, (width1 - dx2 - dx1)*cn );
-
-            if( makeBorder )
-            {
-                    for( i = 0; i < (ptrdiff_t)(dx1*cn); i++ )
-                        srcRow[i] = src[borderTab[i]];
-                    for( i = 0; i < (ptrdiff_t)(dx2*cn); i++ )
-                        srcRow[i + (width1 - dx2)*cn] = src[borderTab[i+dx1*cn]];
-            }
-
-            MorphRow<Op>(srcRow, brow, width, cn, ksize.width);
-        }
-
-        ptrdiff_t max_i = std::min<ptrdiff_t>(bufRows, ssize.height - dy + (kheight - 1));
-        for( i = 0; i < max_i; i++ )
-        {
-            ptrdiff_t srcY = internal::borderInterpolate(dy + i + borderMargin.top - ay,
-                                               wholeSize.height, columnBorderType);
-            if( srcY < 0 ) // can happen only with constant border type
-                rows[i] = constBorderRow;
-            else
-            {
-                if( srcY >= startY + rowCount )
-                    break;
-                ptrdiff_t bi = (srcY - startY0) % bufRows;
-                rows[i] = ringBuf + bi*swidthcn;
-            }
-        }
-        if( i < kheight )
-            break;
-        i -= kheight - 1;
-        MorphColumn<Op>((const u8**)rows, dst, dstStride, i, ssize.width*cn, ksize.height);
-    }
-}
-
-} // namespace
-#endif // CAROTENE_NEON
-
-void erode(const Size2D &ssize, u32 cn,
-           const u8 * srcBase, ptrdiff_t srcStride,
-           u8 * dstBase, ptrdiff_t dstStride,
-           const Size2D &ksize,
-           size_t anchorX, size_t anchorY,
-           BORDER_MODE rowBorderType, BORDER_MODE columnBorderType,
-           const u8 * borderValues, Margin borderMargin)
-{
-    internal::assertSupportedConfiguration(ssize.width > 0 && ssize.height > 0 &&
-                                           anchorX < ksize.width && anchorY < ksize.height);
-#ifdef CAROTENE_NEON
-    morphology<ErodeVecOp>(ssize, cn, srcBase, srcStride, dstBase, dstStride,
-                           ksize, anchorX, anchorY, rowBorderType, columnBorderType,
-                           borderValues, borderMargin);
-#else
-    (void)cn;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)rowBorderType;
-    (void)columnBorderType;
-    (void)borderValues;
-    (void)borderMargin;
-#endif
-}
-
-void dilate(const Size2D &ssize, u32 cn,
-            const u8 * srcBase, ptrdiff_t srcStride,
-            u8 * dstBase, ptrdiff_t dstStride,
-            const Size2D &ksize,
-            size_t anchorX, size_t anchorY,
-            BORDER_MODE rowBorderType, BORDER_MODE columnBorderType,
-            const u8 * borderValues, Margin borderMargin)
-{
-    internal::assertSupportedConfiguration(ssize.width > 0 && ssize.height > 0 &&
-                                           anchorX < ksize.width && anchorY < ksize.height);
-#ifdef CAROTENE_NEON
-    morphology<DilateVecOp>(ssize, cn, srcBase, srcStride, dstBase, dstStride,
-                            ksize, anchorX, anchorY, rowBorderType, columnBorderType,
-                            borderValues, borderMargin);
-#else
-    (void)cn;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)rowBorderType;
-    (void)columnBorderType;
-    (void)borderValues;
-    (void)borderMargin;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/mul.cpp
+++ b/3rdparty/carotene/src/mul.cpp
--- a/3rdparty/carotene/src/norm.cpp
+++ b/3rdparty/carotene/src/norm.cpp
--- a/3rdparty/carotene/src/opticalflow.cpp
+++ b/3rdparty/carotene/src/opticalflow.cpp
@ -1,539 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "saturate_cast.hpp"
-#include <vector>
-#include <float.h> // For FLT_EPSILON
-
-namespace CAROTENE_NS {
-
-#define CV_DESCALE(x,n)     (((x) + (1 << ((n)-1))) >> (n))
-
-/*
- *        Pyramidal Lucas-Kanade Optical Flow level processing
- */
-void pyrLKOptFlowLevel(const Size2D &size, s32 cn,
-                       const u8 *prevData, ptrdiff_t prevStride,
-                       const s16 *prevDerivData, ptrdiff_t prevDerivStride,
-                       const u8 *nextData, ptrdiff_t nextStride,
-                       u32 ptCount,
-                       const f32 *prevPts, f32 *nextPts,
-                       u8 *status, f32 *err,
-                       const Size2D &winSize,
-                       u32 terminationCount, f64 terminationEpsilon,
-                       u32 level, u32 maxLevel, bool useInitialFlow, bool getMinEigenVals,
-                       f32 minEigThreshold)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    f32 halfWinX = (winSize.width-1)*0.5f, halfWinY = (winSize.height-1)*0.5f;
-    s32 cn2 = cn*2;
-
-    std::vector<s16> _buf(winSize.total()*(cn + cn2));
-    s16* IWinBuf = &_buf[0];
-    s32  IWinBufStride = winSize.width*cn;
-    s16* derivIWinBuf = &_buf[winSize.total()*cn];
-    s32  derivIWinBufStride = winSize.width*cn2;
-
-    for( u32 ptidx = 0; ptidx < ptCount; ptidx++ )
-    {
-        f32 levscale = (1./(1 << level));
-        u32 ptref = ptidx << 1;
-        f32 prevPtX = prevPts[ptref+0]*levscale;
-        f32 prevPtY = prevPts[ptref+1]*levscale;
-        f32 nextPtX;
-        f32 nextPtY;
-        if( level == maxLevel )
-        {
-            if( useInitialFlow )
-            {
-                nextPtX = nextPts[ptref+0]*levscale;
-                nextPtY = nextPts[ptref+1]*levscale;
-            }
-            else
-            {
-                nextPtX = prevPtX;
-                nextPtY = prevPtY;
-            }
-        }
-        else
-        {
-            nextPtX = nextPts[ptref+0]*2.f;
-            nextPtY = nextPts[ptref+1]*2.f;
-        }
-        nextPts[ptref+0] = nextPtX;
-        nextPts[ptref+1] = nextPtY;
-
-        s32 iprevPtX, iprevPtY;
-        s32 inextPtX, inextPtY;
-        prevPtX -= halfWinX;
-        prevPtY -= halfWinY;
-        iprevPtX = floor(prevPtX);
-        iprevPtY = floor(prevPtY);
-
-        if( iprevPtX < -(s32)winSize.width || iprevPtX >= (s32)size.width ||
-            iprevPtY < -(s32)winSize.height || iprevPtY >= (s32)size.height )
-        {
-            if( level == 0 )
-            {
-                if( status )
-                    status[ptidx] = false;
-                if( err )
-                    err[ptidx] = 0;
-            }
-            continue;
-        }
-
-        f32 a = prevPtX - iprevPtX;
-        f32 b = prevPtY - iprevPtY;
-        const s32 W_BITS = 14, W_BITS1 = 14;
-        const f32 FLT_SCALE = 1.f/(1 << 20);
-        s32 iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
-        s32 iw01 = round(a*(1.f - b)*(1 << W_BITS));
-        s32 iw10 = round((1.f - a)*b*(1 << W_BITS));
-        s32 iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-
-        s32 dstep = prevDerivStride/sizeof(s16);
-        f32 A11 = 0, A12 = 0, A22 = 0;
-
-        int16x4_t viw00 = vmov_n_s16((s16)iw00);
-        int16x4_t viw01 = vmov_n_s16((s16)iw01);
-        int16x4_t viw10 = vmov_n_s16((s16)iw10);
-        int16x4_t viw11 = vmov_n_s16((s16)iw11);
-
-        float32x4_t vA11 = vmovq_n_f32(0);
-        float32x4_t vA12 = vmovq_n_f32(0);
-        float32x4_t vA22 = vmovq_n_f32(0);
-
-        s32 wwcn = winSize.width*cn;
-
-        // extract the patch from the first image, compute covariation matrix of derivatives
-        s32 x = 0;
-        for(s32 y = 0; y < (s32)winSize.height; y++ )
-        {
-            const u8* src = prevData + prevStride*(y + iprevPtY) + iprevPtX*cn;
-            const s16* dsrc = prevDerivData + dstep*(y + iprevPtY) + iprevPtX*cn2;
-
-            s16* Iptr = IWinBuf + y*IWinBufStride;
-            s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
-
-            internal::prefetch(src + x + prevStride * 2, 0);
-            for(x = 0; x <= wwcn - 8; x += 8)
-            {
-                uint8x8_t vsrc00 = vld1_u8(src + x);
-                uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
-                uint8x8_t vsrc01 = vld1_u8(src + x + cn);
-                uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
-
-                int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vsrc00));
-                int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vsrc10));
-                int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vsrc01));
-                int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vsrc11));
-
-                int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
-                int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
-
-                vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
-                vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
-
-                vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
-                vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
-
-                vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
-                vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
-
-                int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
-                int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
-
-                vst1q_s16(Iptr + x, vcombine_s16(vsumnl, vsumnh));
-            }
-            for(; x <= wwcn - 4; x += 4)
-            {
-                uint8x8_t vsrc00 = vld1_u8(src + x);
-                uint8x8_t vsrc10 = vld1_u8(src + x + prevStride);
-                uint8x8_t vsrc01 = vld1_u8(src + x + cn);
-                uint8x8_t vsrc11 = vld1_u8(src + x + prevStride + cn);
-
-                int16x4_t vs00 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc00)));
-                int16x4_t vs10 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc10)));
-                int16x4_t vs01 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc01)));
-                int16x4_t vs11 = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(vsrc11)));
-
-                int32x4_t vsuml1 = vmull_s16(vs00, viw00);
-                int32x4_t vsuml2 = vmull_s16(vs01, viw01);
-                vsuml1 = vmlal_s16(vsuml1, vs10, viw10);
-                vsuml2 = vmlal_s16(vsuml2, vs11, viw11);
-                int32x4_t vsuml = vaddq_s32(vsuml1, vsuml2);
-
-                int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
-
-                vst1_s16(Iptr + x, vsumnl);
-            }
-
-            internal::prefetch(dsrc + dstep * 2, 0);
-            for(x = 0; x <= wwcn - 4; x += 4, dsrc += 4*2, dIptr += 4*2 )
-            {
-#if __GNUC_MINOR__ < 0
-                __asm__ (
-                    "vld2.16 {d0-d1}, [%[dsrc00]]                         \n\t"
-                    "vld2.16 {d2-d3}, [%[dsrc10]]                         \n\t"
-                    "vld2.16 {d4-d5}, [%[dsrc01]]                         \n\t"
-                    "vld2.16 {d6-d7}, [%[dsrc11]]                         \n\t"
-                    "vmull.s16 q4, d3, %P[viw10]                           \n\t"
-                    "vmull.s16 q5, d0, %P[viw00]                           \n\t"
-                    "vmlal.s16 q4, d7, %P[viw11]                           \n\t"
-                    "vmlal.s16 q5, d4, %P[viw01]                           \n\t"
-                    "vmlal.s16 q4, d1, %P[viw00]                           \n\t"
-                    "vmlal.s16 q5, d2, %P[viw10]                           \n\t"
-                    "vmlal.s16 q4, d5, %P[viw01]                           \n\t"
-                    "vmlal.s16 q5, d6, %P[viw11]                            \n\t"
-                    "vrshrn.s32 d13, q4, %[W_BITS1]                       \n\t"
-                    "vrshrn.s32 d12, q5, %[W_BITS1]                       \n\t"
-                    "vmull.s16 q3, d13, d13                               \n\t"
-                    "vmull.s16 q4, d12, d12                               \n\t"
-                    "vmull.s16 q5, d13, d12                               \n\t"
-                    "vcvt.f32.s32 q3, q3                                  \n\t"
-                    "vcvt.f32.s32 q4, q4                                  \n\t"
-                    "vcvt.f32.s32 q5, q5                                  \n\t"
-                    "vadd.f32 %q[vA22], q3                                \n\t"
-                    "vadd.f32 %q[vA11], q4                                \n\t"
-                    "vadd.f32 %q[vA12], q5                                \n\t"
-                    "vst2.16 {d12-d13}, [%[out]]                          \n\t"
-                    : [vA22] "=w" (vA22),
-                      [vA11] "=w" (vA11),
-                      [vA12] "=w" (vA12)
-                    : "0" (vA22),
-                      "1" (vA11),
-                      "2" (vA12),
-                      [out] "r" (dIptr),
-                      [dsrc00] "r" (dsrc),
-                      [dsrc10] "r" (dsrc + dstep),
-                      [dsrc01] "r" (dsrc + cn2),
-                      [dsrc11] "r" (dsrc + dstep + cn2),
-                      [viw00] "w" (viw00),
-                      [viw10] "w" (viw10),
-                      [viw01] "w" (viw01),
-                      [viw11] "w" (viw11),
-                      [W_BITS1] "I" (W_BITS1)
-                    : "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13"
-                );
-#else
-                int16x4x2_t vdsrc00 = vld2_s16(dsrc);
-                int16x4x2_t vdsrc10 = vld2_s16(dsrc + dstep);
-                int16x4x2_t vdsrc01 = vld2_s16(dsrc + cn2);
-                int16x4x2_t vdsrc11 = vld2_s16(dsrc + dstep + cn2);
-
-                int32x4_t vsumy = vmull_s16(vdsrc10.val[1], viw10);
-                int32x4_t vsumx = vmull_s16(vdsrc00.val[0], viw00);
-
-                vsumy = vmlal_s16(vsumy, vdsrc11.val[1], viw11);
-                vsumx = vmlal_s16(vsumx, vdsrc01.val[0], viw01);
-
-                vsumy = vmlal_s16(vsumy, vdsrc00.val[1], viw00);
-                vsumx = vmlal_s16(vsumx, vdsrc10.val[0], viw10);
-
-                vsumy = vmlal_s16(vsumy, vdsrc01.val[1], viw01);
-                vsumx = vmlal_s16(vsumx, vdsrc11.val[0], viw11);
-
-                int16x4_t vsumny = vrshrn_n_s32(vsumy, W_BITS1);
-                int16x4_t vsumnx = vrshrn_n_s32(vsumx, W_BITS1);
-
-                int32x4_t va22i = vmull_s16(vsumny, vsumny);
-                int32x4_t va11i = vmull_s16(vsumnx, vsumnx);
-                int32x4_t va12i = vmull_s16(vsumnx, vsumny);
-
-                float32x4_t va22f = vcvtq_f32_s32(va22i);
-                float32x4_t va11f = vcvtq_f32_s32(va11i);
-                float32x4_t va12f = vcvtq_f32_s32(va12i);
-
-                vA22 = vaddq_f32(vA22, va22f);
-                vA11 = vaddq_f32(vA11, va11f);
-                vA12 = vaddq_f32(vA12, va12f);
-
-                int16x4x2_t vsum;
-                vsum.val[0] = vsumnx;
-                vsum.val[1] = vsumny;
-                vst2_s16(dIptr, vsum);
-#endif
-            }
-
-            for( ; x < wwcn; x++, dsrc += 2, dIptr += 2 )
-            {
-                s32 ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
-                                      src[x+prevStride]*iw10 + src[x+prevStride+cn]*iw11, W_BITS1-5);
-                s32 ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
-                                       dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
-                s32 iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
-                                       dsrc[dstep+cn2+1]*iw11, W_BITS1);
-                Iptr[x] = (s16)ival;
-                dIptr[0] = (s16)ixval;
-                dIptr[1] = (s16)iyval;
-
-                A11 += (f32)(ixval*ixval);
-                A12 += (f32)(ixval*iyval);
-                A22 += (f32)(iyval*iyval);
-            }
-        }
-
-        f32 A11buf[2], A12buf[2], A22buf[2];
-        vst1_f32(A11buf, vadd_f32(vget_low_f32(vA11), vget_high_f32(vA11)));
-        vst1_f32(A12buf, vadd_f32(vget_low_f32(vA12), vget_high_f32(vA12)));
-        vst1_f32(A22buf, vadd_f32(vget_low_f32(vA22), vget_high_f32(vA22)));
-        A11 += A11buf[0] + A11buf[1];
-        A12 += A12buf[0] + A12buf[1];
-        A22 += A22buf[0] + A22buf[1];
-
-        A11 *= FLT_SCALE;
-        A12 *= FLT_SCALE;
-        A22 *= FLT_SCALE;
-
-        f32 D = A11*A22 - A12*A12;
-        f32 minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
-                        4.f*A12*A12))/(2*winSize.width*winSize.height);
-
-        if( err && getMinEigenVals )
-            err[ptidx] = (f32)minEig;
-
-        if( minEig < minEigThreshold || D < FLT_EPSILON )
-        {
-            if( level == 0 && status )
-                status[ptidx] = false;
-            continue;
-        }
-
-        D = 1.f/D;
-
-        nextPtX -= halfWinX;
-        nextPtY -= halfWinY;
-        f32 prevDeltaX = 0;
-        f32 prevDeltaY = 0;
-
-        for(u32 j = 0; j < terminationCount; j++ )
-        {
-            inextPtX = floor(nextPtX);
-            inextPtY = floor(nextPtY);
-
-            if( inextPtX < -(s32)winSize.width || inextPtX >= (s32)size.width ||
-               inextPtY < -(s32)winSize.height || inextPtY >= (s32)size.height )
-            {
-                if( level == 0 && status )
-                    status[ptidx] = false;
-                break;
-            }
-
-            a = nextPtX - inextPtX;
-            b = nextPtY - inextPtY;
-            iw00 = round((1.f - a)*(1.f - b)*(1 << W_BITS));
-            iw01 = round(a*(1.f - b)*(1 << W_BITS));
-            iw10 = round((1.f - a)*b*(1 << W_BITS));
-            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-            f32 b1 = 0, b2 = 0;
-
-            viw00 = vmov_n_s16((s16)iw00);
-            viw01 = vmov_n_s16((s16)iw01);
-            viw10 = vmov_n_s16((s16)iw10);
-            viw11 = vmov_n_s16((s16)iw11);
-
-            float32x4_t vb1 = vmovq_n_f32(0);
-            float32x4_t vb2 = vmovq_n_f32(0);
-
-            for(s32 y = 0; y < (s32)winSize.height; y++ )
-            {
-                const u8* Jptr = nextData + nextStride*(y + inextPtY) + inextPtX*cn;
-                const s16* Iptr = IWinBuf + y*IWinBufStride;
-                const s16* dIptr = derivIWinBuf + y*derivIWinBufStride;
-
-                x = 0;
-
-                internal::prefetch(Jptr, nextStride * 2);
-                internal::prefetch(Iptr, IWinBufStride/2);
-                internal::prefetch(dIptr, derivIWinBufStride/2);
-
-                for( ; x <= wwcn - 8; x += 8, dIptr += 8*2 )
-                {
-                    uint8x8_t vj00 = vld1_u8(Jptr + x);
-                    uint8x8_t vj10 = vld1_u8(Jptr + x + nextStride);
-                    uint8x8_t vj01 = vld1_u8(Jptr + x + cn);
-                    uint8x8_t vj11 = vld1_u8(Jptr + x + nextStride + cn);
-                    int16x8_t vI = vld1q_s16(Iptr + x);
-                    int16x8x2_t vDerivI = vld2q_s16(dIptr);
-
-                    int16x8_t vs00 = vreinterpretq_s16_u16(vmovl_u8(vj00));
-                    int16x8_t vs10 = vreinterpretq_s16_u16(vmovl_u8(vj10));
-                    int16x8_t vs01 = vreinterpretq_s16_u16(vmovl_u8(vj01));
-                    int16x8_t vs11 = vreinterpretq_s16_u16(vmovl_u8(vj11));
-
-                    int32x4_t vsuml = vmull_s16(vget_low_s16(vs00), viw00);
-                    int32x4_t vsumh = vmull_s16(vget_high_s16(vs10), viw10);
-
-                    vsuml = vmlal_s16(vsuml, vget_low_s16(vs01), viw01);
-                    vsumh = vmlal_s16(vsumh, vget_high_s16(vs11), viw11);
-
-                    vsuml = vmlal_s16(vsuml, vget_low_s16(vs10), viw10);
-                    vsumh = vmlal_s16(vsumh, vget_high_s16(vs00), viw00);
-
-                    vsuml = vmlal_s16(vsuml, vget_low_s16(vs11), viw11);
-                    vsumh = vmlal_s16(vsumh, vget_high_s16(vs01), viw01);
-
-                    int16x4_t vsumnl = vrshrn_n_s32(vsuml, W_BITS1-5);
-                    int16x4_t vsumnh = vrshrn_n_s32(vsumh, W_BITS1-5);
-
-                    int16x8_t diff = vqsubq_s16(vcombine_s16(vsumnl, vsumnh), vI);
-
-                    int32x4_t vb1l = vmull_s16(vget_low_s16(diff), vget_low_s16(vDerivI.val[0]));
-                    int32x4_t vb2h = vmull_s16(vget_high_s16(diff), vget_high_s16(vDerivI.val[1]));
-                    int32x4_t vb1i = vmlal_s16(vb1l, vget_high_s16(diff), vget_high_s16(vDerivI.val[0]));
-                    int32x4_t vb2i = vmlal_s16(vb2h, vget_low_s16(diff), vget_low_s16(vDerivI.val[1]));
-
-                    float32x4_t vb1f = vcvtq_f32_s32(vb1i);
-                    float32x4_t vb2f = vcvtq_f32_s32(vb2i);
-
-                    vb1 = vaddq_f32(vb1, vb1f);
-                    vb2 = vaddq_f32(vb2, vb2f);
-                }
-
-                for( ; x < wwcn; x++, dIptr += 2 )
-                {
-                    s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
-                                          Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
-                                          W_BITS1-5) - Iptr[x];
-                    b1 += (f32)(diff*dIptr[0]);
-                    b2 += (f32)(diff*dIptr[1]);
-                }
-            }
-
-            f32 bbuf[2];
-            float32x2_t vb = vpadd_f32(vadd_f32(vget_low_f32(vb1), vget_high_f32(vb1)), vadd_f32(vget_low_f32(vb2), vget_high_f32(vb2)));
-            vst1_f32(bbuf, vb);
-            b1 += bbuf[0];
-            b2 += bbuf[1];
-
-            b1 *= FLT_SCALE;
-            b2 *= FLT_SCALE;
-
-            f32 deltaX = (f32)((A12*b2 - A22*b1) * D);
-            f32 deltaY = (f32)((A12*b1 - A11*b2) * D);
-
-            nextPtX += deltaX;
-            nextPtY += deltaY;
-            nextPts[ptref+0] = nextPtX + halfWinX;
-            nextPts[ptref+1] = nextPtY + halfWinY;
-
-            if( ((double)deltaX*deltaX + (double)deltaY*deltaY) <= terminationEpsilon )
-                break;
-
-            if( j > 0 && std::abs(deltaX + prevDeltaX) < 0.01 &&
-               std::abs(deltaY + prevDeltaY) < 0.01 )
-            {
-                nextPts[ptref+0] -= deltaX*0.5f;
-                nextPts[ptref+1] -= deltaY*0.5f;
-                break;
-            }
-            prevDeltaX = deltaX;
-            prevDeltaY = deltaY;
-        }
-
-        if( status && status[ptidx] && err && level == 0 && !getMinEigenVals )
-        {
-            f32 nextPointX = nextPts[ptref+0] - halfWinX;
-            f32 nextPointY = nextPts[ptref+1] - halfWinY;
-
-            s32 inextPointX = floor(nextPointX);
-            s32 inextPointY = floor(nextPointY);
-
-            if( inextPointX < -(s32)winSize.width || inextPointX >= (s32)size.width ||
-                inextPointY < -(s32)winSize.height || inextPointY >= (s32)size.height )
-            {
-                if( status )
-                    status[ptidx] = false;
-                continue;
-            }
-
-            f32 aa = nextPointX - inextPointX;
-            f32 bb = nextPointY - inextPointY;
-            iw00 = round((1.f - aa)*(1.f - bb)*(1 << W_BITS));
-            iw01 = round(aa*(1.f - bb)*(1 << W_BITS));
-            iw10 = round((1.f - aa)*bb*(1 << W_BITS));
-            iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
-            f32 errval = 0.f;
-
-            for(s32 y = 0; y < (s32)winSize.height; y++ )
-            {
-                const u8* Jptr = nextData + nextStride*(y + inextPointY) + inextPointX*cn;
-                const s16* Iptr = IWinBuf + y*IWinBufStride;
-
-                for( x = 0; x < wwcn; x++ )
-                {
-                    s32 diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
-                                          Jptr[x+nextStride]*iw10 + Jptr[x+nextStride+cn]*iw11,
-                                          W_BITS1-5) - Iptr[x];
-                    errval += std::abs((f32)diff);
-                }
-            }
-            err[ptidx] = errval / (32*wwcn*winSize.height);
-        }
-    }
-#else
-    (void)size;
-    (void)cn;
-    (void)prevData;
-    (void)prevStride;
-    (void)prevDerivData;
-    (void)prevDerivStride;
-    (void)nextData;
-    (void)nextStride;
-    (void)prevPts;
-    (void)nextPts;
-    (void)status;
-    (void)err;
-    (void)winSize;
-    (void)terminationCount;
-    (void)terminationEpsilon;
-    (void)level;
-    (void)maxLevel;
-    (void)useInitialFlow;
-    (void)getMinEigenVals;
-    (void)minEigThreshold;
-    (void)ptCount;
-#endif
-}
-
-}//CAROTENE_NS
-
--- a/3rdparty/carotene/src/phase.cpp
+++ b/3rdparty/carotene/src/phase.cpp
@ -1,274 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include <cfloat>
-#include <cmath>
-
-#include "common.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-#define FASTATAN2CONST(scale) \
-        f32 P1((f32)( 0.9997878412794807  * (180.0 / M_PI) * scale)), \
-        P3((f32)(-0.3258083974640975  * (180.0 / M_PI) * scale)), \
-        P5((f32)( 0.1555786518463281  * (180.0 / M_PI) * scale)), \
-        P7((f32)(-0.04432655554792128 * (180.0 / M_PI) * scale)), \
-         A_90((f32)(90.f * scale)), \
-        A_180((f32)(180.f * scale)), \
-        A_360((f32)(360.f * scale)); \
-        float32x4_t eps(vdupq_n_f32((float)DBL_EPSILON)), \
-         _90(vdupq_n_f32(A_90)), \
-        _180(vdupq_n_f32(A_180)), \
-        _360(vdupq_n_f32(A_360)), \
-           z(vdupq_n_f32(0.0f)), \
-        p1(vdupq_n_f32(P1)), \
-        p3(vdupq_n_f32(P3)), \
-        p5(vdupq_n_f32(P5)), \
-        p7(vdupq_n_f32(P7));
-
-#define FASTATAN2SCALAR(y, x, a) \
-    { \
-        f32 ax = std::abs(x), ay = std::abs(y); \
-        f32 c, c2; \
-        if (ax >= ay) \
-        { \
-            c = ay / (ax + (float)DBL_EPSILON); \
-            c2 = c * c; \
-            a = (((P7 * c2 + P5) * c2 + P3) * c2 + P1) * c; \
-        } \
-        else \
-        { \
-            c = ax / (ay + (float)DBL_EPSILON); \
-            c2 = c * c; \
-            a = A_90 - (((P7 * c2 + P5) * c2 + P3) * c2 + P1) * c; \
-        } \
-        if (x < 0) \
-            a = A_180 - a; \
-        if (y < 0) \
-            a = A_360 - a; \
-    }
-
-#define FASTATAN2VECTOR(v_y, v_x, a) \
-    { \
-        float32x4_t ax = vabsq_f32(v_x), ay = vabsq_f32(v_y); \
-        float32x4_t tmin = vminq_f32(ax, ay), tmax = vmaxq_f32(ax, ay); \
-        float32x4_t c = vmulq_f32(tmin, internal::vrecpq_f32(vaddq_f32(tmax, eps))); \
-        float32x4_t c2 = vmulq_f32(c, c); \
-        a = vmulq_f32(c2, p7); \
- \
-        a = vmulq_f32(vaddq_f32(a, p5), c2); \
-        a = vmulq_f32(vaddq_f32(a, p3), c2); \
-        a = vmulq_f32(vaddq_f32(a, p1), c); \
- \
-        a = vbslq_f32(vcgeq_f32(ax, ay), a, vsubq_f32(_90, a)); \
-        a = vbslq_f32(vcltq_f32(v_x, z), vsubq_f32(_180, a), a); \
-        a = vbslq_f32(vcltq_f32(v_y, z), vsubq_f32(_360, a), a); \
- \
-    }
-
-} // namespace
-
-#endif
-
-void phase(const Size2D &size,
-           const s16 * src0Base, ptrdiff_t src0Stride,
-           const s16 * src1Base, ptrdiff_t src1Stride,
-           u8 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    FASTATAN2CONST(256.0f / 360.0f)
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    float32x4_t v_05 = vdupq_n_f32(0.5f);
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const s16 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const s16 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        u8 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw16; j += 16)
-        {
-            internal::prefetch(src0 + j);
-            internal::prefetch(src1 + j);
-
-            int16x8_t v_src00 = vld1q_s16(src0 + j), v_src01 = vld1q_s16(src0 + j + 8);
-            int16x8_t v_src10 = vld1q_s16(src1 + j), v_src11 = vld1q_s16(src1 + j + 8);
-
-            // 0
-            float32x4_t v_src0_p = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src00)));
-            float32x4_t v_src1_p = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src10)));
-            float32x4_t v_dst32f0;
-            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f0)
-
-            v_src0_p = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src00)));
-            v_src1_p = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src10)));
-            float32x4_t v_dst32f1;
-            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f1)
-
-            uint16x8_t v_dst16s0 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f0, v_05))),
-                                                vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f1, v_05))));
-
-            // 1
-            v_src0_p = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src01)));
-            v_src1_p = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src11)));
-            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f0)
-
-            v_src0_p = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src01)));
-            v_src1_p = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src11)));
-            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f1)
-
-            uint16x8_t v_dst16s1 = vcombine_u16(vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f0, v_05))),
-                                                vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f1, v_05))));
-
-            vst1q_u8(dst + j, vcombine_u8(vmovn_u16(v_dst16s0),
-                                          vmovn_u16(v_dst16s1)));
-        }
-        for (; j < roiw8; j += 8)
-        {
-            int16x8_t v_src0 = vld1q_s16(src0 + j);
-            int16x8_t v_src1 = vld1q_s16(src1 + j);
-
-            float32x4_t v_src0_p = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src0)));
-            float32x4_t v_src1_p = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src1)));
-            float32x4_t v_dst32f0;
-            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f0)
-
-            v_src0_p = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src0)));
-            v_src1_p = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src1)));
-            float32x4_t v_dst32f1;
-            FASTATAN2VECTOR(v_src1_p, v_src0_p, v_dst32f1)
-
-            uint16x8_t v_dst = vcombine_u16(vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f0, v_05))),
-                                            vmovn_u32(vcvtq_u32_f32(vaddq_f32(v_dst32f1, v_05))));
-
-            vst1_u8(dst + j, vmovn_u16(v_dst));
-        }
-
-        for (; j < size.width; j++)
-        {
-            f32 x = src0[j], y = src1[j];
-            f32 a;
-            FASTATAN2SCALAR(y, x, a)
-            dst[j] = (u8)(s32)floor(a + 0.5f);
-        }
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void phase(const Size2D &size,
-           const f32 * src0Base, ptrdiff_t src0Stride,
-           const f32 * src1Base, ptrdiff_t src1Stride,
-           f32 * dstBase, ptrdiff_t dstStride,
-           f32 scale)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    FASTATAN2CONST(scale)
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const f32 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const f32 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        f32 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw8; j += 8)
-        {
-            internal::prefetch(src0 + j);
-            internal::prefetch(src1 + j);
-
-            float32x4_t v_src00 = vld1q_f32(src0 + j), v_src01 = vld1q_f32(src0 + j + 4);
-            float32x4_t v_src10 = vld1q_f32(src1 + j), v_src11 = vld1q_f32(src1 + j + 4);
-
-            float32x4_t v_dst32f;
-            // 0
-            FASTATAN2VECTOR(v_src10, v_src00, v_dst32f)
-            vst1q_f32(dst + j,     v_dst32f);
-            // 1
-            FASTATAN2VECTOR(v_src11, v_src01, v_dst32f)
-            vst1q_f32(dst + j + 4, v_dst32f);
-        }
-        if(j + 4 <= size.width)
-        {
-            float32x4_t v_src0 = vld1q_f32(src0 + j);
-            float32x4_t v_src1 = vld1q_f32(src1 + j);
-
-            float32x4_t v_dst32f;
-            FASTATAN2VECTOR(v_src1, v_src0, v_dst32f)
-            vst1q_f32(dst + j, v_dst32f);
-            j += 4;
-        }
-
-        for (; j < size.width; j++)
-        {
-            f32 a;
-            FASTATAN2SCALAR(src1[j], src0[j], a)
-            dst[j] = a;
-        }
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)scale;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/pyramid.cpp
+++ b/3rdparty/carotene/src/pyramid.cpp
--- a/3rdparty/carotene/src/reduce.cpp
+++ b/3rdparty/carotene/src/reduce.cpp
@ -1,460 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-void reduceColSum(const Size2D &size,
-                  const u8 * srcBase, ptrdiff_t srcStride,
-                  s32 * dstBase)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    memset(dstBase, 0, size.width*sizeof(s32));
-    size_t i = 0;
-    for (; i + 16 <= size.width; i += 16)
-    {
-        const u8* src_address = srcBase + i;
-
-        int32x4_t sll = vmovq_n_s32(0);
-        int32x4_t slh = vmovq_n_s32(0);
-        int32x4_t shl = vmovq_n_s32(0);
-        int32x4_t shh = vmovq_n_s32(0);
-
-        for (size_t h = 0; h < size.height; h += 256)
-        {
-            size_t lim = std::min(h + 256, size.height);
-
-            uint16x8_t sl = vmovq_n_u16(0);
-            uint16x8_t sh = vmovq_n_u16(0);
-
-            for (size_t k = h; k < lim; ++k, src_address += srcStride)
-            {
-                internal::prefetch(src_address + srcStride, 0);
-
-                uint8x16_t v = vld1q_u8(src_address);
-
-                sl = vaddw_u8(sl, vget_low_u8(v));
-                sh = vaddw_u8(sh, vget_high_u8(v));
-            }
-
-            int32x4_t vsll = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(sl)));
-            int32x4_t vslh = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(sl)));
-            int32x4_t vshl = vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(sh)));
-            int32x4_t vshh = vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(sh)));
-
-            sll = vqaddq_s32(sll, vsll);
-            slh = vqaddq_s32(slh, vslh);
-            shl = vqaddq_s32(shl, vshl);
-            shh = vqaddq_s32(shh, vshh);
-        }
-
-        vst1q_s32(dstBase + i + 0, sll);
-        vst1q_s32(dstBase + i + 4, slh);
-        vst1q_s32(dstBase + i + 8, shl);
-        vst1q_s32(dstBase + i + 12, shh);
-    }
-
-    for(size_t h = 0; h < size.height; ++h)
-    {
-        for(size_t j = i ; j < size.width; j++ )
-        {
-            if (((u32)(dstBase[j] += srcBase[j + srcStride * h])) > 0x7fFFffFFu)
-                dstBase[j] = 0x7fFFffFF;
-        }
-    }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-#endif
-}
-
-void reduceColMax(const Size2D &size,
-                  const u8 * srcBase, ptrdiff_t srcStride,
-                  u8 * dstBase)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    memcpy(dstBase, srcBase, size.width);
-    size_t i = 0;
-    for (; i + 16*4 <= size.width; i += 16*4)
-    {
-        const u8* src_address = srcBase + i;
-
-        uint8x16_t s1 = vld1q_u8(src_address + 0);
-        uint8x16_t s2 = vld1q_u8(src_address + 16);
-        uint8x16_t s3 = vld1q_u8(src_address + 32);
-        uint8x16_t s4 = vld1q_u8(src_address + 48);
-
-        src_address += srcStride;
-
-        for(size_t h = 1; h < size.height; ++h, src_address += srcStride)
-        {
-            internal::prefetch(src_address + srcStride, 0);
-            internal::prefetch(src_address + srcStride, 32);
-
-            uint8x16_t v1 = vld1q_u8(src_address + 0);
-            uint8x16_t v2 = vld1q_u8(src_address + 16);
-            uint8x16_t v3 = vld1q_u8(src_address + 32);
-            uint8x16_t v4 = vld1q_u8(src_address + 48);
-
-            s1 = vmaxq_u8(s1, v1);
-            s2 = vmaxq_u8(s2, v2);
-            s3 = vmaxq_u8(s3, v3);
-            s4 = vmaxq_u8(s4, v4);
-        }
-
-        vst1q_u8(dstBase + i + 0, s1);
-        vst1q_u8(dstBase + i + 16, s2);
-        vst1q_u8(dstBase + i + 32, s3);
-        vst1q_u8(dstBase + i + 48, s4);
-    }
-
-    for (; i + 16 <= size.width; i += 16)
-    {
-        const u8* src_address = srcBase + i;
-        uint8x16_t s1 = vld1q_u8(src_address);
-        src_address += srcStride;
-        for(size_t h = 1; h < size.height; ++h, src_address += srcStride)
-        {
-            internal::prefetch(src_address + srcStride, 0);
-
-            uint8x16_t v1 = vld1q_u8(src_address);
-            s1 = vmaxq_u8(s1, v1);
-        }
-        vst1q_u8(dstBase + i, s1);
-    }
-
-    if (i < size.width)
-        for(size_t h = 1; h < size.height; ++h)
-            for(size_t j = i ; j < size.width; j++ )
-                dstBase[j] = std::max(dstBase[j], srcBase[j + srcStride * h]);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-#endif
-}
-
-void reduceColMin(const Size2D &size,
-                  const u8 * srcBase, ptrdiff_t srcStride,
-                  u8 * dstBase)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    memcpy(dstBase, srcBase, size.width);
-    size_t i = 0;
-    for (; i + 16*4 <= size.width; i += 16*4)
-    {
-        const u8* src_address = srcBase + i;
-
-        uint8x16_t s1 = vld1q_u8(src_address + 0);
-        uint8x16_t s2 = vld1q_u8(src_address + 16);
-        uint8x16_t s3 = vld1q_u8(src_address + 32);
-        uint8x16_t s4 = vld1q_u8(src_address + 48);
-
-        src_address += srcStride;
-
-        for(size_t h = 1; h < size.height; ++h, src_address += srcStride)
-        {
-            internal::prefetch(src_address + srcStride, 0);
-            internal::prefetch(src_address + srcStride, 32);
-
-            uint8x16_t v1 = vld1q_u8(src_address + 0);
-            uint8x16_t v2 = vld1q_u8(src_address + 16);
-            uint8x16_t v3 = vld1q_u8(src_address + 32);
-            uint8x16_t v4 = vld1q_u8(src_address + 48);
-
-            s1 = vminq_u8(s1, v1);
-            s2 = vminq_u8(s2, v2);
-            s3 = vminq_u8(s3, v3);
-            s4 = vminq_u8(s4, v4);
-        }
-
-        vst1q_u8(dstBase + i + 0, s1);
-        vst1q_u8(dstBase + i + 16, s2);
-        vst1q_u8(dstBase + i + 32, s3);
-        vst1q_u8(dstBase + i + 48, s4);
-    }
-
-    for (; i + 16 <= size.width; i += 16)
-    {
-        const u8* src_address = srcBase + i;
-        uint8x16_t s1 = vld1q_u8(src_address);
-        src_address += srcStride;
-        for(size_t h = 1; h < size.height; ++h, src_address += srcStride)
-        {
-            internal::prefetch(src_address + srcStride, 0);
-
-            uint8x16_t v1 = vld1q_u8(src_address);
-            s1 = vminq_u8(s1, v1);
-        }
-        vst1q_u8(dstBase + i, s1);
-    }
-
-    if (i < size.width)
-        for(size_t h = 1; h < size.height; ++h)
-            for(size_t j = i ; j < size.width; j++ )
-                dstBase[j] = std::min(dstBase[j], srcBase[j + srcStride * h]);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-#endif
-}
-
-void reduceColSum(const Size2D &size,
-                  const f32 * srcBase, ptrdiff_t srcStride,
-                  f32 * dstBase)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    memcpy(dstBase, srcBase, size.width*sizeof(f32));
-    size_t srcstep = srcStride/sizeof(f32);
-    size_t i = 0;
-    for (; i + 16 <= size.width; i += 16)
-    {
-        const f32* src_address = srcBase + i;
-
-        float32x4_t s1 = vld1q_f32(src_address + 0);
-        float32x4_t s2 = vld1q_f32(src_address + 4);
-        float32x4_t s3 = vld1q_f32(src_address + 8);
-        float32x4_t s4 = vld1q_f32(src_address + 12);
-
-        src_address += srcstep;
-
-        for(size_t h = 1; h < size.height; ++h, src_address += srcstep)
-        {
-            internal::prefetch(src_address + srcstep, 0);
-            internal::prefetch(src_address + srcstep, 32);
-
-            float32x4_t v1 = vld1q_f32(src_address + 0);
-            float32x4_t v2 = vld1q_f32(src_address + 4);
-            float32x4_t v3 = vld1q_f32(src_address + 8);
-            float32x4_t v4 = vld1q_f32(src_address + 12);
-
-            s1 = vaddq_f32(s1, v1);
-            s2 = vaddq_f32(s2, v2);
-            s3 = vaddq_f32(s3, v3);
-            s4 = vaddq_f32(s4, v4);
-        }
-
-        vst1q_f32(dstBase + i + 0, s1);
-        vst1q_f32(dstBase + i + 4, s2);
-        vst1q_f32(dstBase + i + 8, s3);
-        vst1q_f32(dstBase + i + 12, s4);
-    }
-
-    for (; i + 4 <= size.width; i += 4)
-    {
-        const f32* src_address = srcBase + i;
-        float32x4_t s1 = vld1q_f32(src_address);
-        src_address += srcstep;
-        for(size_t h = 1; h < size.height; ++h, src_address += srcstep)
-        {
-            internal::prefetch(src_address + srcstep, 0);
-
-            float32x4_t v1 = vld1q_f32(src_address);
-            s1 = vaddq_f32(s1, v1);
-        }
-        vst1q_f32(dstBase + i, s1);
-    }
-
-    if (i < size.width)
-        for(size_t h = 1; h < size.height; ++h)
-        {
-            for(size_t j = i ; j < size.width; j++ )
-            {
-                dstBase[j] += srcBase[j + srcstep * h];
-            }
-        }
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-#endif
-}
-
-void reduceColMax(const Size2D &size,
-                  const f32 * srcBase, ptrdiff_t srcStride,
-                  f32 * dstBase)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    memcpy(dstBase, srcBase, size.width*sizeof(f32));
-    size_t srcstep = srcStride/sizeof(f32);
-    size_t i = 0;
-    for (; i + 16 <= size.width; i += 16)
-    {
-        const f32* src_address = srcBase + i;
-
-        float32x4_t s1 = vld1q_f32(src_address + 0);
-        float32x4_t s2 = vld1q_f32(src_address + 4);
-        float32x4_t s3 = vld1q_f32(src_address + 8);
-        float32x4_t s4 = vld1q_f32(src_address + 12);
-
-        src_address += srcstep;
-
-        for(size_t h = 1; h < size.height; ++h, src_address += srcstep)
-        {
-            internal::prefetch(src_address + srcstep, 0);
-            internal::prefetch(src_address + srcstep, 32);
-
-            float32x4_t v1 = vld1q_f32(src_address + 0);
-            float32x4_t v2 = vld1q_f32(src_address + 4);
-            float32x4_t v3 = vld1q_f32(src_address + 8);
-            float32x4_t v4 = vld1q_f32(src_address + 12);
-
-            s1 = vmaxq_f32(s1, v1);
-            s2 = vmaxq_f32(s2, v2);
-            s3 = vmaxq_f32(s3, v3);
-            s4 = vmaxq_f32(s4, v4);
-        }
-
-        vst1q_f32(dstBase + i + 0, s1);
-        vst1q_f32(dstBase + i + 4, s2);
-        vst1q_f32(dstBase + i + 8, s3);
-        vst1q_f32(dstBase + i + 12, s4);
-    }
-
-    for (; i + 4 <= size.width; i += 4)
-    {
-        const f32* src_address = srcBase + i;
-        float32x4_t s1 = vld1q_f32(src_address);
-        src_address += srcstep;
-        for(size_t h = 1; h < size.height; ++h, src_address += srcstep)
-        {
-            internal::prefetch(src_address + srcstep, 0);
-
-            float32x4_t v1 = vld1q_f32(src_address);
-            s1 = vmaxq_f32(s1, v1);
-        }
-        vst1q_f32(dstBase + i, s1);
-    }
-
-    if (i < size.width)
-        for(size_t h = 1; h < size.height; ++h)
-            for(size_t j = i ; j < size.width; j++ )
-                dstBase[j] = std::max(dstBase[j], srcBase[j + srcstep * h]);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-#endif
-}
-
-void reduceColMin(const Size2D &size,
-                  const f32 * srcBase, ptrdiff_t srcStride,
-                  f32 * dstBase)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    memcpy(dstBase, srcBase, size.width*sizeof(f32));
-    size_t srcstep = srcStride/sizeof(f32);
-    size_t i = 0;
-    for (; i + 16 <= size.width; i += 16)
-    {
-        const f32* src_address = srcBase + i;
-
-        float32x4_t s1 = vld1q_f32(src_address + 0);
-        float32x4_t s2 = vld1q_f32(src_address + 4);
-        float32x4_t s3 = vld1q_f32(src_address + 8);
-        float32x4_t s4 = vld1q_f32(src_address + 12);
-
-        src_address += srcstep;
-
-        for(size_t h = 1; h < size.height; ++h, src_address += srcstep)
-        {
-            internal::prefetch(src_address + srcstep, 0);
-            internal::prefetch(src_address + srcstep, 32);
-
-            float32x4_t v1 = vld1q_f32(src_address + 0);
-            float32x4_t v2 = vld1q_f32(src_address + 4);
-            float32x4_t v3 = vld1q_f32(src_address + 8);
-            float32x4_t v4 = vld1q_f32(src_address + 12);
-
-            s1 = vminq_f32(s1, v1);
-            s2 = vminq_f32(s2, v2);
-            s3 = vminq_f32(s3, v3);
-            s4 = vminq_f32(s4, v4);
-        }
-
-        vst1q_f32(dstBase + i + 0, s1);
-        vst1q_f32(dstBase + i + 4, s2);
-        vst1q_f32(dstBase + i + 8, s3);
-        vst1q_f32(dstBase + i + 12, s4);
-    }
-
-    for (; i + 4 <= size.width; i += 4)
-    {
-        const f32* src_address = srcBase + i;
-        float32x4_t s1 = vld1q_f32(src_address);
-        src_address += srcstep;
-        for(size_t h = 1; h < size.height; ++h, src_address += srcstep)
-        {
-            internal::prefetch(src_address + srcstep, 0);
-
-            float32x4_t v1 = vld1q_f32(src_address);
-            s1 = vminq_f32(s1, v1);
-        }
-        vst1q_f32(dstBase + i, s1);
-    }
-
-    if (i < size.width)
-        for(size_t h = 1; h < size.height; ++h)
-            for(size_t j = i ; j < size.width; j++ )
-                dstBase[j] = std::min(dstBase[j], srcBase[j + srcstep * h]);
-#else
-    (void)size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/remap.cpp
+++ b/3rdparty/carotene/src/remap.cpp
@ -1,694 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "remap.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace internal {
-
-void remapNearestNeighborReplicate(const Size2D size,
-                                   const u8 * srcBase,
-                                   const s32 * map,
-                                   u8 * dstBase, ptrdiff_t dstStride)
-{
-    for (size_t y = 0; y < size.height; ++y)
-    {
-        const s32 * map_row = internal::getRowPtr(map, size.width * sizeof(s32), y);
-        u8 * dst_row = internal::getRowPtr(dstBase, dstStride, y);
-
-        for (size_t x = 0; x < size.width; ++x)
-        {
-            dst_row[x] = srcBase[map_row[x]];
-        }
-    }
-}
-
-void remapNearestNeighborConst(const Size2D size,
-                               const u8 * srcBase,
-                               const s32 * map,
-                               u8 * dstBase, ptrdiff_t dstStride,
-                               u8 borderValue)
-{
-    for (size_t y = 0; y < size.height; ++y)
-    {
-        const s32 * map_row = internal::getRowPtr(map, size.width * sizeof(s32), y);
-        u8 * dst_row = internal::getRowPtr(dstBase, dstStride, y);
-
-        for (size_t x = 0; x < size.width; ++x)
-        {
-            s32 src_idx = map_row[x];
-            dst_row[x] = src_idx >= 0 ? srcBase[map_row[x]] : borderValue;
-        }
-    }
-}
-
-void remapLinearReplicate(const Size2D size,
-                          const u8 * srcBase,
-                          const s32 * map,
-                          const f32 * coeffs,
-                          u8 * dstBase, ptrdiff_t dstStride)
-{
-    int16x8_t v_zero16 = vdupq_n_s16(0);
-
-    for (size_t y = 0; y < size.height; ++y)
-    {
-        const s32 * map_row = internal::getRowPtr(map, size.width * sizeof(s32) * 4, y);
-        const f32 * coeff_row = internal::getRowPtr(coeffs, size.width * sizeof(f32) * 2, y);
-
-        u8 * dst_row = internal::getRowPtr(dstBase, dstStride, y);
-
-        size_t x = 0;
-        for ( ; x + 8 < size.width; x += 8)
-        {
-            int16x8_t v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2)]], v_zero16, 0);
-            v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 4]], v_src00, 1);
-            v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 8]], v_src00, 2);
-            v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 12]], v_src00, 3);
-            v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 16]], v_src00, 4);
-            v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 20]], v_src00, 5);
-            v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 24]], v_src00, 6);
-            v_src00 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 28]], v_src00, 7);
-
-            int16x8_t v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 1]], v_zero16, 0);
-            v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 5]], v_src01, 1);
-            v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 9]], v_src01, 2);
-            v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 13]], v_src01, 3);
-            v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 17]], v_src01, 4);
-            v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 21]], v_src01, 5);
-            v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 25]], v_src01, 6);
-            v_src01 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 29]], v_src01, 7);
-
-            int16x8_t v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 2]], v_zero16, 0);
-            v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 6]], v_src10, 1);
-            v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 10]], v_src10, 2);
-            v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 14]], v_src10, 3);
-            v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 18]], v_src10, 4);
-            v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 22]], v_src10, 5);
-            v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 26]], v_src10, 6);
-            v_src10 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 30]], v_src10, 7);
-
-            int16x8_t v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 3]], v_zero16, 0);
-            v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 7]], v_src11, 1);
-            v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 11]], v_src11, 2);
-            v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 15]], v_src11, 3);
-            v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 19]], v_src11, 4);
-            v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 23]], v_src11, 5);
-            v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 27]], v_src11, 6);
-            v_src11 = vsetq_lane_s16(srcBase[map_row[(x << 2) + 31]], v_src11, 7);
-
-            // first part
-            float32x4_t v_src00_f = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src00)));
-            float32x4_t v_src10_f = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src10)));
-
-            float32x4x2_t v_coeff = vld2q_f32(coeff_row + (x << 1));
-            float32x4_t v_dst_0 = vmlaq_f32(v_src00_f, vcvtq_f32_s32(vsubl_s16(vget_low_s16(v_src01),
-                                                                               vget_low_s16(v_src00))), v_coeff.val[0]);
-            float32x4_t v_dst_1 = vmlaq_f32(v_src10_f, vcvtq_f32_s32(vsubl_s16(vget_low_s16(v_src11),
-                                                                               vget_low_s16(v_src10))), v_coeff.val[0]);
-
-            float32x4_t v_dst = vmlaq_f32(v_dst_0, vsubq_f32(v_dst_1, v_dst_0), v_coeff.val[1]);
-            uint16x4_t v_dst0 = vmovn_u32(vcvtq_u32_f32(v_dst));
-
-            // second part
-            v_src00_f = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src00)));
-            v_src10_f = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src10)));
-
-            v_coeff = vld2q_f32(coeff_row + (x << 1) + 8);
-            v_dst_0 = vmlaq_f32(v_src00_f, vcvtq_f32_s32(vsubl_s16(vget_high_s16(v_src01),
-                                                                   vget_high_s16(v_src00))), v_coeff.val[0]);
-            v_dst_1 = vmlaq_f32(v_src10_f, vcvtq_f32_s32(vsubl_s16(vget_high_s16(v_src11),
-                                                                   vget_high_s16(v_src10))), v_coeff.val[0]);
-
-            v_dst = vmlaq_f32(v_dst_0, vsubq_f32(v_dst_1, v_dst_0), v_coeff.val[1]);
-            uint16x4_t v_dst1 = vmovn_u32(vcvtq_u32_f32(v_dst));
-
-            // store
-            vst1_u8(dst_row + x, vmovn_u16(vcombine_u16(v_dst0, v_dst1)));
-        }
-
-        for ( ; x < size.width; ++x)
-        {
-            s32 src00_index = map_row[(x << 2)];
-            s32 src10_index = map_row[(x << 2) + 2];
-            f32 dst_val_0 = (srcBase[map_row[(x << 2) + 1]] - srcBase[src00_index]) * coeff_row[x << 1] +
-                             srcBase[src00_index];
-            f32 dst_val_1 = (srcBase[map_row[(x << 2) + 3]] - srcBase[src10_index]) * coeff_row[x << 1] +
-                             srcBase[src10_index];
-            dst_row[x] = floorf((dst_val_1 - dst_val_0) * coeff_row[(x << 1) + 1] + dst_val_0);
-        }
-    }
-}
-
-void remapLinearConst(const Size2D size,
-                      const u8 * srcBase,
-                      const s32 * map,
-                      const f32 * coeffs,
-                      u8 * dstBase, ptrdiff_t dstStride,
-                      u8 borderValue)
-{
-    int16x8_t v_zero16 = vdupq_n_s16(0);
-
-    for (size_t y = 0; y < size.height; ++y)
-    {
-        const s32 * map_row = internal::getRowPtr(map, size.width * sizeof(s32) * 4, y);
-        const f32 * coeff_row = internal::getRowPtr(coeffs, size.width * sizeof(f32) * 2, y);
-
-        u8 * dst_row = internal::getRowPtr(dstBase, dstStride, y);
-
-        size_t x = 0;
-        for ( ; x + 8 < size.width; x += 8)
-        {
-            int16x8_t v_src00 = vsetq_lane_s16(map_row[(x << 2)] >= 0 ? srcBase[map_row[(x << 2)]] : borderValue, v_zero16, 0);
-            v_src00 = vsetq_lane_s16(map_row[(x << 2) +  4] >= 0 ? srcBase[map_row[(x << 2) +  4]] : borderValue, v_src00, 1);
-            v_src00 = vsetq_lane_s16(map_row[(x << 2) +  8] >= 0 ? srcBase[map_row[(x << 2) +  8]] : borderValue, v_src00, 2);
-            v_src00 = vsetq_lane_s16(map_row[(x << 2) + 12] >= 0 ? srcBase[map_row[(x << 2) + 12]] : borderValue, v_src00, 3);
-            v_src00 = vsetq_lane_s16(map_row[(x << 2) + 16] >= 0 ? srcBase[map_row[(x << 2) + 16]] : borderValue, v_src00, 4);
-            v_src00 = vsetq_lane_s16(map_row[(x << 2) + 20] >= 0 ? srcBase[map_row[(x << 2) + 20]] : borderValue, v_src00, 5);
-            v_src00 = vsetq_lane_s16(map_row[(x << 2) + 24] >= 0 ? srcBase[map_row[(x << 2) + 24]] : borderValue, v_src00, 6);
-            v_src00 = vsetq_lane_s16(map_row[(x << 2) + 28] >= 0 ? srcBase[map_row[(x << 2) + 28]] : borderValue, v_src00, 7);
-
-            int16x8_t v_src01 = vsetq_lane_s16(map_row[(x << 2) + 1] >= 0 ? srcBase[map_row[(x << 2) + 1]] : borderValue, v_zero16, 0);
-            v_src01 = vsetq_lane_s16(map_row[(x << 2) +  5] >= 0 ? srcBase[map_row[(x << 2) +  5]] : borderValue, v_src01, 1);
-            v_src01 = vsetq_lane_s16(map_row[(x << 2) +  9] >= 0 ? srcBase[map_row[(x << 2) +  9]] : borderValue, v_src01, 2);
-            v_src01 = vsetq_lane_s16(map_row[(x << 2) + 13] >= 0 ? srcBase[map_row[(x << 2) + 13]] : borderValue, v_src01, 3);
-            v_src01 = vsetq_lane_s16(map_row[(x << 2) + 17] >= 0 ? srcBase[map_row[(x << 2) + 17]] : borderValue, v_src01, 4);
-            v_src01 = vsetq_lane_s16(map_row[(x << 2) + 21] >= 0 ? srcBase[map_row[(x << 2) + 21]] : borderValue, v_src01, 5);
-            v_src01 = vsetq_lane_s16(map_row[(x << 2) + 25] >= 0 ? srcBase[map_row[(x << 2) + 25]] : borderValue, v_src01, 6);
-            v_src01 = vsetq_lane_s16(map_row[(x << 2) + 29] >= 0 ? srcBase[map_row[(x << 2) + 29]] : borderValue, v_src01, 7);
-
-            int16x8_t v_src10 = vsetq_lane_s16(map_row[(x << 2) + 2] >= 0 ? srcBase[map_row[(x << 2) + 2]] : borderValue, v_zero16, 0);
-            v_src10 = vsetq_lane_s16(map_row[(x << 2) +  6] >= 0 ? srcBase[map_row[(x << 2) +  6]] : borderValue, v_src10, 1);
-            v_src10 = vsetq_lane_s16(map_row[(x << 2) + 10] >= 0 ? srcBase[map_row[(x << 2) + 10]] : borderValue, v_src10, 2);
-            v_src10 = vsetq_lane_s16(map_row[(x << 2) + 14] >= 0 ? srcBase[map_row[(x << 2) + 14]] : borderValue, v_src10, 3);
-            v_src10 = vsetq_lane_s16(map_row[(x << 2) + 18] >= 0 ? srcBase[map_row[(x << 2) + 18]] : borderValue, v_src10, 4);
-            v_src10 = vsetq_lane_s16(map_row[(x << 2) + 22] >= 0 ? srcBase[map_row[(x << 2) + 22]] : borderValue, v_src10, 5);
-            v_src10 = vsetq_lane_s16(map_row[(x << 2) + 26] >= 0 ? srcBase[map_row[(x << 2) + 26]] : borderValue, v_src10, 6);
-            v_src10 = vsetq_lane_s16(map_row[(x << 2) + 30] >= 0 ? srcBase[map_row[(x << 2) + 30]] : borderValue, v_src10, 7);
-
-            int16x8_t v_src11 = vsetq_lane_s16(map_row[(x << 2) + 3] >= 0 ? srcBase[map_row[(x << 2) + 3]] : borderValue, v_zero16, 0);
-            v_src11 = vsetq_lane_s16(map_row[(x << 2) +  7] >= 0 ? srcBase[map_row[(x << 2) +  7]] : borderValue, v_src11, 1);
-            v_src11 = vsetq_lane_s16(map_row[(x << 2) + 11] >= 0 ? srcBase[map_row[(x << 2) + 11]] : borderValue, v_src11, 2);
-            v_src11 = vsetq_lane_s16(map_row[(x << 2) + 15] >= 0 ? srcBase[map_row[(x << 2) + 15]] : borderValue, v_src11, 3);
-            v_src11 = vsetq_lane_s16(map_row[(x << 2) + 19] >= 0 ? srcBase[map_row[(x << 2) + 19]] : borderValue, v_src11, 4);
-            v_src11 = vsetq_lane_s16(map_row[(x << 2) + 23] >= 0 ? srcBase[map_row[(x << 2) + 23]] : borderValue, v_src11, 5);
-            v_src11 = vsetq_lane_s16(map_row[(x << 2) + 27] >= 0 ? srcBase[map_row[(x << 2) + 27]] : borderValue, v_src11, 6);
-            v_src11 = vsetq_lane_s16(map_row[(x << 2) + 31] >= 0 ? srcBase[map_row[(x << 2) + 31]] : borderValue, v_src11, 7);
-
-            // first part
-            float32x4_t v_src00_f = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src00)));
-            float32x4_t v_src10_f = vcvtq_f32_s32(vmovl_s16(vget_low_s16(v_src10)));
-
-            float32x4x2_t v_coeff = vld2q_f32(coeff_row + (x << 1));
-            float32x4_t v_dst_0 = vmlaq_f32(v_src00_f, vcvtq_f32_s32(vsubl_s16(vget_low_s16(v_src01),
-                                                                               vget_low_s16(v_src00))), v_coeff.val[0]);
-            float32x4_t v_dst_1 = vmlaq_f32(v_src10_f, vcvtq_f32_s32(vsubl_s16(vget_low_s16(v_src11),
-                                                                               vget_low_s16(v_src10))), v_coeff.val[0]);
-
-            float32x4_t v_dst = vmlaq_f32(v_dst_0, vsubq_f32(v_dst_1, v_dst_0), v_coeff.val[1]);
-            uint16x4_t v_dst0 = vmovn_u32(vcvtq_u32_f32(v_dst));
-
-            // second part
-            v_src00_f = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src00)));
-            v_src10_f = vcvtq_f32_s32(vmovl_s16(vget_high_s16(v_src10)));
-
-            v_coeff = vld2q_f32(coeff_row + (x << 1) + 8);
-            v_dst_0 = vmlaq_f32(v_src00_f, vcvtq_f32_s32(vsubl_s16(vget_high_s16(v_src01),
-                                                                   vget_high_s16(v_src00))), v_coeff.val[0]);
-            v_dst_1 = vmlaq_f32(v_src10_f, vcvtq_f32_s32(vsubl_s16(vget_high_s16(v_src11),
-                                                                   vget_high_s16(v_src10))), v_coeff.val[0]);
-
-            v_dst = vmlaq_f32(v_dst_0, vsubq_f32(v_dst_1, v_dst_0), v_coeff.val[1]);
-            uint16x4_t v_dst1 = vmovn_u32(vcvtq_u32_f32(v_dst));
-
-            // store
-            vst1_u8(dst_row + x, vmovn_u16(vcombine_u16(v_dst0, v_dst1)));
-        }
-
-        for ( ; x < size.width; ++x)
-        {
-            s16 src00 = map_row[(x << 2) + 0] >= 0 ? srcBase[map_row[(x << 2) + 0]] : borderValue;
-            s16 src01 = map_row[(x << 2) + 1] >= 0 ? srcBase[map_row[(x << 2) + 1]] : borderValue;
-            s16 src10 = map_row[(x << 2) + 2] >= 0 ? srcBase[map_row[(x << 2) + 2]] : borderValue;
-            s16 src11 = map_row[(x << 2) + 3] >= 0 ? srcBase[map_row[(x << 2) + 3]] : borderValue;
-
-            f32 dst_val_0 = (src01 - src00) * coeff_row[(x << 1)] + src00;
-            f32 dst_val_1 = (src11 - src10) * coeff_row[(x << 1)] + src10;
-            dst_row[x] = floorf((dst_val_1 - dst_val_0) * coeff_row[(x << 1) + 1] + dst_val_0);
-        }
-    }
-}
-
-} // namespace internal
-
-#endif // CAROTENE_NEON
-
-bool isRemapNearestNeighborSupported(const Size2D &ssize)
-{
-#if SIZE_MAX > UINT32_MAX
-    return !(ssize.width > 0xffffFFFF || ssize.height > 0xffffFFFF) && // Restrict image size since internal index evaluation
-                                                                       // is performed with u32
-           isSupportedConfiguration();
-#else
-    (void)ssize;
-    return isSupportedConfiguration();
-#endif
-}
-
-bool isRemapLinearSupported(const Size2D &ssize)
-{
-#if SIZE_MAX > UINT32_MAX
-    return !(ssize.width > 0xffffFFFF || ssize.height > 0xffffFFFF) && // Restrict image size since internal index evaluation
-                                                                       // is performed with u32
-           isSupportedConfiguration();
-#else
-    (void)ssize;
-    return isSupportedConfiguration();
-#endif
-}
-
-void remapNearestNeighbor(const Size2D &ssize, const Size2D &dsize,
-                          const u8 * srcBase, ptrdiff_t srcStride,
-                          const f32 * tableBase, ptrdiff_t tableStride,
-                          u8 * dstBase, ptrdiff_t dstStride,
-                          BORDER_MODE borderMode, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isRemapNearestNeighborSupported(ssize));
-#ifdef CAROTENE_NEON
-    using namespace internal;
-
-    s32 _map[BLOCK_SIZE * BLOCK_SIZE + 16];
-    s32 * map = alignPtr(_map, 16);
-
-    int32x4_t v_width4 = vdupq_n_s32(ssize.width - 1), v_height4 = vdupq_n_s32(ssize.height - 1);
-    int32x2_t v_width2 = vdup_n_s32(ssize.width - 1), v_height2 = vdup_n_s32(ssize.height - 1);
-    int32x4_t v_step4 = vdupq_n_s32(srcStride);
-    int32x2_t v_step2 = vdup_n_s32(srcStride);
-
-    if (borderMode == BORDER_MODE_REPLICATE)
-    {
-        int32x4_t v_zero4 = vdupq_n_s32(0);
-        int32x2_t v_zero2 = vdup_n_s32(0);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    const f32 * table_row = getRowPtr(tableBase, tableStride, i + y) + (j << 1);
-                    s32 * map_row = getRowPtr(&map[0], blockWidth * sizeof(s32), y);
-
-                    size_t x = 0;
-                    for ( ; x + 8 <= blockWidth; x += 8)
-                    {
-                        float32x4x2_t v_table0 = vld2q_f32(table_row + (x << 1)),
-                                      v_table1 = vld2q_f32(table_row + (x << 1) + 8);
-
-                        int32x4_t v_dst_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vcvtq_s32_f32(v_table0.val[0])));
-                        int32x4_t v_dst_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vcvtq_s32_f32(v_table0.val[1])));
-                        int32x4_t v_dst_index = vmlaq_s32(v_dst_x, v_dst_y, v_step4);
-                        vst1q_s32(map_row + x, v_dst_index);
-
-                        v_dst_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vcvtq_s32_f32(v_table1.val[0])));
-                        v_dst_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vcvtq_s32_f32(v_table1.val[1])));
-                        v_dst_index = vmlaq_s32(v_dst_x, v_dst_y, v_step4);
-                        vst1q_s32(map_row + x + 4, v_dst_index);
-                    }
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4x2_t v_table0 = vld2q_f32(table_row + (x << 1));
-
-                        int32x4_t v_dst_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vcvtq_s32_f32(v_table0.val[0])));
-                        int32x4_t v_dst_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vcvtq_s32_f32(v_table0.val[1])));
-                        int32x4_t v_dst_index = vmlaq_s32(v_dst_x, v_dst_y, v_step4);
-                        vst1q_s32(map_row + x, v_dst_index);
-                    }
-
-                    for ( ; x + 2 <= blockWidth; x += 2)
-                    {
-                        float32x2x2_t v_table0 = vld2_f32(table_row + (x << 1));
-
-                        int32x2_t v_dst_x = vmax_s32(v_zero2, vmin_s32(v_width2, vcvt_s32_f32(v_table0.val[0])));
-                        int32x2_t v_dst_y = vmax_s32(v_zero2, vmin_s32(v_height2, vcvt_s32_f32(v_table0.val[1])));
-                        int32x2_t v_dst_index = vmla_s32(v_dst_x, v_dst_y, v_step2);
-                        vst1_s32(map_row + x, v_dst_index);
-                    }
-
-                    for ( ; x < blockWidth; ++x)
-                    {
-                        s32 src_x = std::max(0, std::min<s32>(ssize.width - 1, (s32)floorf(table_row[(x << 1) + 0])));
-                        s32 src_y = std::max(0, std::min<s32>(ssize.height - 1, (s32)floorf(table_row[(x << 1) + 1])));
-                        map_row[x] = src_y * srcStride + src_x;
-                    }
-                }
-
-                // make remap
-                remapNearestNeighborReplicate(Size2D(blockWidth, blockHeight), srcBase, &map[0],
-                                              getRowPtr(dstBase, dstStride, i) + j, dstStride);
-            }
-        }
-    }
-    else if (borderMode == BORDER_MODE_CONSTANT)
-    {
-        int32x4_t v_m1_4 = vdupq_n_s32(-1);
-        int32x2_t v_m1_2 = vdup_n_s32(-1);
-        float32x4_t v_zero4 = vdupq_n_f32(0.0f);
-        float32x2_t v_zero2 = vdup_n_f32(0.0f);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    const f32 * table_row = getRowPtr(tableBase, tableStride, i + y) + (j << 1);
-                    s32 * map_row = getRowPtr(&map[0], blockWidth * sizeof(s32), y);
-
-                    size_t x = 0;
-                    for ( ; x + 8 <= blockWidth; x += 8)
-                    {
-                        float32x4x2_t v_table0 = vld2q_f32(table_row + (x << 1)),
-                                      v_table1 = vld2q_f32(table_row + (x << 1) + 8);
-
-                        int32x4_t v_dst_x = vcvtq_s32_f32(v_table0.val[0]);
-                        int32x4_t v_dst_y = vcvtq_s32_f32(v_table0.val[1]);
-                        uint32x4_t v_mask = vandq_u32(vandq_u32(vcgeq_f32(v_table0.val[0], v_zero4), vcleq_s32(v_dst_x, v_width4)),
-                                                      vandq_u32(vcgeq_f32(v_table0.val[1], v_zero4), vcleq_s32(v_dst_y, v_height4)));
-                        int32x4_t v_dst_index = vbslq_s32(v_mask, vmlaq_s32(v_dst_x, v_dst_y, v_step4), v_m1_4);
-                        vst1q_s32(map_row + x, v_dst_index);
-
-                        v_dst_x = vcvtq_s32_f32(v_table1.val[0]);
-                        v_dst_y = vcvtq_s32_f32(v_table1.val[1]);
-                        v_mask = vandq_u32(vandq_u32(vcgeq_f32(v_table1.val[0], v_zero4), vcleq_s32(v_dst_x, v_width4)),
-                                           vandq_u32(vcgeq_f32(v_table1.val[1], v_zero4), vcleq_s32(v_dst_y, v_height4)));
-                        v_dst_index = vbslq_s32(v_mask, vmlaq_s32(v_dst_x, v_dst_y, v_step4), v_m1_4);
-                        vst1q_s32(map_row + x + 4, v_dst_index);
-                    }
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4x2_t v_table0 = vld2q_f32(table_row + (x << 1));
-
-                        int32x4_t v_dst_x = vcvtq_s32_f32(v_table0.val[0]);
-                        int32x4_t v_dst_y = vcvtq_s32_f32(v_table0.val[1]);
-                        uint32x4_t v_mask = vandq_u32(vandq_u32(vcgeq_f32(v_table0.val[0], v_zero4), vcleq_s32(v_dst_x, v_width4)),
-                                                      vandq_u32(vcgeq_f32(v_table0.val[1], v_zero4), vcleq_s32(v_dst_y, v_height4)));
-                        int32x4_t v_dst_index = vbslq_s32(v_mask, vmlaq_s32(v_dst_x, v_dst_y, v_step4), v_m1_4);
-                        vst1q_s32(map_row + x, v_dst_index);
-                    }
-
-                    for ( ; x + 2 <= blockWidth; x += 2)
-                    {
-                        float32x2x2_t v_table0 = vld2_f32(table_row + (x << 1));
-
-                        int32x2_t v_dst_x = vcvt_s32_f32(v_table0.val[0]);
-                        int32x2_t v_dst_y = vcvt_s32_f32(v_table0.val[1]);
-                        uint32x2_t v_mask = vand_u32(vand_u32(vcge_f32(v_table0.val[0], v_zero2), vcle_s32(v_dst_x, v_width2)),
-                                                     vand_u32(vcge_f32(v_table0.val[1], v_zero2), vcle_s32(v_dst_y, v_height2)));
-                        int32x2_t v_dst_index = vbsl_s32(v_mask, vmla_s32(v_dst_x, v_dst_y, v_step2), v_m1_2);
-                        vst1_s32(map_row + x, v_dst_index);
-                    }
-
-                    for ( ; x < blockWidth; ++x)
-                    {
-                        s32 src_x = (s32)floorf(table_row[(x << 1) + 0]);
-                        s32 src_y = (s32)floorf(table_row[(x << 1) + 1]);
-                        map_row[x] = (src_x >= 0) && (src_x < (s32)ssize.width) &&
-                                     (src_y >= 0) && (src_y < (s32)ssize.height) ? src_y * srcStride + src_x : -1;
-                    }
-                }
-
-                // make remap
-                remapNearestNeighborConst(Size2D(blockWidth, blockHeight), srcBase, &map[0],
-                                          getRowPtr(dstBase, dstStride, i) + j, dstStride, borderValue);
-            }
-        }
-    }
-
-#else
-    (void)ssize;
-    (void)dsize;
-    (void)srcBase;
-    (void)srcStride;
-    (void)tableBase;
-    (void)tableStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderMode;
-    (void)borderValue;
-#endif
-}
-
-void remapLinear(const Size2D &ssize, const Size2D &dsize,
-                 const u8 * srcBase, ptrdiff_t srcStride,
-                 const f32 * tableBase, ptrdiff_t tableStride,
-                 u8 * dstBase, ptrdiff_t dstStride,
-                 BORDER_MODE borderMode, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isRemapLinearSupported(ssize));
-#ifdef CAROTENE_NEON
-    using namespace internal;
-
-    s32 _map[((BLOCK_SIZE * BLOCK_SIZE) << 2) + 16];
-    f32 _coeffs[((BLOCK_SIZE * BLOCK_SIZE) << 1) + 16];
-
-    s32 * map = alignPtr(_map, 16);
-    f32 * coeffs = alignPtr(_coeffs, 16);
-
-    int32x4_t v_width4 = vdupq_n_s32(ssize.width - 1), v_height4 = vdupq_n_s32(ssize.height - 1);
-    int32x4_t v_step4 = vdupq_n_s32(srcStride), v_1 = vdupq_n_s32(1);
-    float32x4_t v_zero4f = vdupq_n_f32(0.0f), v_one4f = vdupq_n_f32(1.0f);
-
-    if (borderMode == BORDER_MODE_REPLICATE)
-    {
-        int32x4_t v_zero4 = vdupq_n_s32(0);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    const f32 * table_row = getRowPtr(tableBase, tableStride, i + y) + (j << 1);
-
-                    s32 * map_row = getRowPtr(map, blockWidth * sizeof(s32) * 4, y);
-                    f32 * coeff_row = getRowPtr(coeffs, blockWidth * sizeof(f32) * 2, y);
-
-                    size_t x = 0;
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4x2_t v_table = vld2q_f32(table_row + (x << 1));
-
-                        int32x4_t v_src_x = vcvtq_s32_f32(v_table.val[0]);
-                        int32x4_t v_src_y = vcvtq_s32_f32(v_table.val[1]);
-
-                        float32x4x2_t  v_coeff;
-                        v_coeff.val[0] = vsubq_f32(v_table.val[0], vcvtq_f32_s32(v_src_x));
-                        v_coeff.val[1] = vsubq_f32(v_table.val[1], vcvtq_f32_s32(v_src_y));
-                        uint32x4_t v_maskx = vcltq_f32(v_coeff.val[0], v_zero4f);
-                        uint32x4_t v_masky = vcltq_f32(v_coeff.val[1], v_zero4f);
-                        v_coeff.val[0] = vbslq_f32(v_maskx, vaddq_f32(v_one4f, v_coeff.val[0]), v_coeff.val[0]);
-                        v_coeff.val[1] = vbslq_f32(v_masky, vaddq_f32(v_one4f, v_coeff.val[1]), v_coeff.val[1]);
-                        v_src_x = vbslq_s32(v_maskx, vsubq_s32(v_src_x, v_1), v_src_x);
-                        v_src_y = vbslq_s32(v_masky, vsubq_s32(v_src_y, v_1), v_src_y);
-
-                        int32x4_t v_dst0_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, v_src_x));
-                        int32x4_t v_dst0_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, v_src_y));
-                        int32x4_t v_dst1_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vaddq_s32(v_1, v_src_x)));
-                        int32x4_t v_dst1_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vaddq_s32(v_1, v_src_y)));
-
-                        int32x4x4_t v_dst_index;
-                        v_dst_index.val[0] = vmlaq_s32(v_dst0_x, v_dst0_y, v_step4);
-                        v_dst_index.val[1] = vmlaq_s32(v_dst1_x, v_dst0_y, v_step4);
-                        v_dst_index.val[2] = vmlaq_s32(v_dst0_x, v_dst1_y, v_step4);
-                        v_dst_index.val[3] = vmlaq_s32(v_dst1_x, v_dst1_y, v_step4);
-
-                        vst2q_f32(coeff_row + (x << 1), v_coeff);
-                        vst4q_s32(map_row + (x << 2), v_dst_index);
-                    }
-
-                    for ( ; x < blockWidth; ++x)
-                    {
-                        f32 src_x_f = table_row[(x << 1) + 0];
-                        f32 src_y_f = table_row[(x << 1) + 1];
-
-                        s32 src0_x = (s32)floorf(src_x_f);
-                        s32 src0_y = (s32)floorf(src_y_f);
-
-                        coeff_row[x << 1] = src_x_f - src0_x;
-                        coeff_row[(x << 1) + 1] = src_y_f - src0_y;
-
-                        s32 src1_y = std::max(0, std::min<s32>(ssize.height - 1, src0_y + 1));
-                        src0_y = std::max(0, std::min<s32>(ssize.height - 1, src0_y));
-                        s32 src1_x = std::max(0, std::min<s32>(ssize.width - 1, src0_x + 1));
-                        src0_x = std::max(0, std::min<s32>(ssize.width - 1, src0_x));
-
-                        map_row[(x << 2) + 0] = src0_y * srcStride + src0_x;
-                        map_row[(x << 2) + 1] = src0_y * srcStride + src1_x;
-                        map_row[(x << 2) + 2] = src1_y * srcStride + src0_x;
-                        map_row[(x << 2) + 3] = src1_y * srcStride + src1_x;
-                    }
-                }
-
-                remapLinearReplicate(Size2D(blockWidth, blockHeight),
-                                     srcBase, &map[0], &coeffs[0],
-                                     getRowPtr(dstBase, dstStride, i) + j, dstStride);
-            }
-        }
-    }
-    else if (borderMode == BORDER_MODE_CONSTANT)
-    {
-        float32x4_t v_zero4 = vdupq_n_f32(0.0f);
-        int32x4_t v_m1_4 = vdupq_n_s32(-1);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    const f32 * table_row = getRowPtr(tableBase, tableStride, i + y) + (j << 1);
-
-                    s32 * map_row = getRowPtr(map, blockWidth * sizeof(s32) * 4, y);
-                    f32 * coeff_row = getRowPtr(coeffs, blockWidth * sizeof(f32) * 2, y);
-
-                    size_t x = 0;
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4x2_t v_table = vld2q_f32(table_row + (x << 1));
-
-                        int32x4_t v_src_x0 = vcvtq_s32_f32(v_table.val[0]);
-                        int32x4_t v_src_y0 = vcvtq_s32_f32(v_table.val[1]);
-
-                        float32x4x2_t v_coeff;
-                        v_coeff.val[0] = vsubq_f32(v_table.val[0], vcvtq_f32_s32(v_src_x0));
-                        v_coeff.val[1] = vsubq_f32(v_table.val[1], vcvtq_f32_s32(v_src_y0));
-                        uint32x4_t v_maskx = vcltq_f32(v_coeff.val[0], v_zero4f);
-                        uint32x4_t v_masky = vcltq_f32(v_coeff.val[1], v_zero4f);
-                        v_coeff.val[0] = vbslq_f32(v_maskx, vaddq_f32(v_one4f, v_coeff.val[0]), v_coeff.val[0]);
-                        v_coeff.val[1] = vbslq_f32(v_masky, vaddq_f32(v_one4f, v_coeff.val[1]), v_coeff.val[1]);
-                        v_src_x0 = vbslq_s32(v_maskx, vsubq_s32(v_src_x0, v_1), v_src_x0);
-                        v_src_y0 = vbslq_s32(v_masky, vsubq_s32(v_src_y0, v_1), v_src_y0);
-
-                        int32x4_t v_src_x1 = vaddq_s32(v_src_x0, v_1);
-                        int32x4_t v_src_y1 = vaddq_s32(v_src_y0, v_1);
-
-                        int32x4x4_t v_dst_index;
-                        v_dst_index.val[0] = vmlaq_s32(v_src_x0, v_src_y0, v_step4);
-                        v_dst_index.val[1] = vmlaq_s32(v_src_x1, v_src_y0, v_step4);
-                        v_dst_index.val[2] = vmlaq_s32(v_src_x0, v_src_y1, v_step4);
-                        v_dst_index.val[3] = vmlaq_s32(v_src_x1, v_src_y1, v_step4);
-
-                        uint32x4_t v_mask_x0 = vandq_u32(vcgeq_f32(v_table.val[0], v_zero4), vcleq_s32(v_src_x0, v_width4));
-                        uint32x4_t v_mask_x1 = vandq_u32(vcgeq_f32(vaddq_f32(v_table.val[0], v_one4f), v_zero4), vcleq_s32(v_src_x1, v_width4));
-                        uint32x4_t v_mask_y0 = vandq_u32(vcgeq_f32(v_table.val[1], v_zero4), vcleq_s32(v_src_y0, v_height4));
-                        uint32x4_t v_mask_y1 = vandq_u32(vcgeq_f32(vaddq_f32(v_table.val[1], v_one4f), v_zero4), vcleq_s32(v_src_y1, v_height4));
-
-                        v_dst_index.val[0] = vbslq_s32(vandq_u32(v_mask_x0, v_mask_y0), v_dst_index.val[0], v_m1_4);
-                        v_dst_index.val[1] = vbslq_s32(vandq_u32(v_mask_x1, v_mask_y0), v_dst_index.val[1], v_m1_4);
-                        v_dst_index.val[2] = vbslq_s32(vandq_u32(v_mask_x0, v_mask_y1), v_dst_index.val[2], v_m1_4);
-                        v_dst_index.val[3] = vbslq_s32(vandq_u32(v_mask_x1, v_mask_y1), v_dst_index.val[3], v_m1_4);
-
-                        vst2q_f32(coeff_row + (x << 1), v_coeff);
-                        vst4q_s32(map_row + (x << 2), v_dst_index);
-                    }
-
-                    for ( ; x < blockWidth; ++x)
-                    {
-                        f32 src_x_f = table_row[(x << 1) + 0];
-                        f32 src_y_f = table_row[(x << 1) + 1];
-
-                        s32 src0_x = (s32)floorf(src_x_f), src1_x = src0_x + 1;
-                        s32 src0_y = (s32)floorf(src_y_f), src1_y = src0_y + 1;
-
-                        coeff_row[(x << 1)] = src_x_f - src0_x;
-                        coeff_row[(x << 1) + 1] = src_y_f - src0_y;
-
-                        map_row[(x << 2) + 0] = (src0_x >= 0) && (src0_x < (s32)ssize.width) &&
-                                                (src0_y >= 0) && (src0_y < (s32)ssize.height) ? src0_y * srcStride + src0_x : -1;
-                        map_row[(x << 2) + 1] = (src1_x >= 0) && (src1_x < (s32)ssize.width) &&
-                                                (src0_y >= 0) && (src0_y < (s32)ssize.height) ? src0_y * srcStride + src1_x : -1;
-                        map_row[(x << 2) + 2] = (src0_x >= 0) && (src0_x < (s32)ssize.width) &&
-                                                (src1_y >= 0) && (src1_y < (s32)ssize.height) ? src1_y * srcStride + src0_x : -1;
-                        map_row[(x << 2) + 3] = (src1_x >= 0) && (src1_x < (s32)ssize.width) &&
-                                                (src1_y >= 0) && (src1_y < (s32)ssize.height) ? src1_y * srcStride + src1_x : -1;
-                    }
-                }
-
-                remapLinearConst(Size2D(blockWidth, blockHeight),
-                                 srcBase, &map[0], &coeffs[0],
-                                 getRowPtr(dstBase, dstStride, i) + j, dstStride, borderValue);
-            }
-        }
-    }
-#else
-    (void)ssize;
-    (void)dsize;
-    (void)srcBase;
-    (void)srcStride;
-    (void)tableBase;
-    (void)tableStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderMode;
-    (void)borderValue;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/remap.hpp
+++ b/3rdparty/carotene/src/remap.hpp
@ -1,85 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#ifndef CAROTENE_SRC_REMAP_HPP
-#define CAROTENE_SRC_REMAP_HPP
-
-#include "common.hpp"
-
-#include <cmath>
-
-#ifdef CAROTENE_NEON
-
-namespace CAROTENE_NS { namespace internal {
-
-enum
-{
-    BLOCK_SIZE = 32
-};
-
-
-void remapNearestNeighborReplicate(const Size2D size,
-                                   const u8 * srcBase,
-                                   const s32 * map,
-                                   u8 * dstBase, ptrdiff_t dstStride);
-
-void remapNearestNeighborConst(const Size2D size,
-                               const u8 * srcBase,
-                               const s32 * map,
-                               u8 * dstBase, ptrdiff_t dstStride,
-                               u8 borderValue);
-
-void remapLinearReplicate(const Size2D size,
-                          const u8 * srcBase,
-                          const s32 * map,
-                          const f32 * coeffs,
-                          u8 * dstBase, ptrdiff_t dstStride);
-
-void remapLinearConst(const Size2D size,
-                      const u8 * srcBase,
-                      const s32 * map,
-                      const f32 * coeffs,
-                      u8 * dstBase, ptrdiff_t dstStride,
-                      u8 borderValue);
-
-} }
-
-#endif // CAROTENE_NEON
-
-#endif // CAROTENE_SRC_REMAP_HPP
--- a/3rdparty/carotene/src/resize.cpp
+++ b/3rdparty/carotene/src/resize.cpp
--- a/3rdparty/carotene/src/saturate_cast.hpp
+++ b/3rdparty/carotene/src/saturate_cast.hpp
@ -1,199 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#ifndef CAROTENE_SATURATE_CAST_HPP
-#define CAROTENE_SATURATE_CAST_HPP
-
-#include <algorithm>
-#include <climits>
-#include <cmath>
-
-#if defined _MSC_VER && defined _M_ARM
-# include <intrin.h>
-#endif
-
-#include <carotene/definitions.hpp>
-#include <carotene/types.hpp>
-
-namespace CAROTENE_NS { namespace internal {
-
-#if defined _MSC_VER && defined _M_ARM
-
-__declspec(naked) static void vcvtr_s32_f64_imp(f64 d)
-{
-    (void)d;
-    __emit(0xEEBD);  // vcvtr.s32.f64 s0, d0
-    __emit(0x0B40);
-    __emit(0xEE10);  // vmov r0, s0
-    __emit(0x0A10);
-    __emit(0x4770);  // bx lr
-}
-
-# define CAROTENE_ROUND_FLT(x) return ((s32 (*)(f64))vcvtr_s32_f64_imp)((f64)x);
-# define CAROTENE_ROUND_DBL(x) return ((s32 (*)(f64))vcvtr_s32_f64_imp)(x);
-
-#elif defined CV_ICC || defined __GNUC__
-
-# if defined(__VFP_FP__) && !defined(__SOFTFP__) && !(defined _DEBUG || defined DEBUG) && !defined(__CUDACC__)
-#  define CAROTENE_ROUND_FLT(value) {                              \
-    register union { f32 f; s32 i; } result;                    \
-    asm ("ftosis  %0, %1 \n" : "=w" (result.f) : "w" (value) ); \
-    return result.i; }
-#  define CAROTENE_ROUND_DBL(value) {                      \
-    register union {f32 f; s32 i;} __tegra_result;      \
-    asm (                                               \
-        "ftosid  %0, %P1\n"                             \
-        : "=w" (__tegra_result.f)                       \
-        : "w" (value)                                   \
-    );                                                  \
-    return __tegra_result.i;                            \
-    }
-# else
-#  define CAROTENE_ROUND_FLT(x) return (s32)lrintf(value);
-#  define CAROTENE_ROUND_DBL(value) return (s32)lrint(value);
-# endif
-
-#endif
-
-inline s32 round(f32 value)
-{
-#ifdef CAROTENE_ROUND_FLT
-    CAROTENE_ROUND_FLT(value)
-#else
-    s32 intpart = (s32)(value);
-    f32 fractpart = value - intpart;
-    if ((fractpart != 0.5 && fractpart != -0.5) || ((intpart % 2) != 0))
-        return (s32)(value + (value >= 0 ? 0.5 : -0.5));
-    else
-        return intpart;
-#endif
-}
-
-inline s32 round(f64 value)
-{
-#ifdef CAROTENE_ROUND_DBL
-    CAROTENE_ROUND_DBL(value)
-#else
-    s32 intpart = (s32)(value);
-    f64 fractpart = value - intpart;
-    if ((fractpart != 0.5 && fractpart != -0.5) || ((intpart % 2) != 0))
-        return (s32)(value + (value >= 0 ? 0.5 : -0.5));
-    else
-        return intpart;
-#endif
-}
-/////////////// saturate_cast (used in image & signal processing) ///////////////////
-
-template<typename _Tp> inline _Tp saturate_cast(u8 v)    { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(s8 v)    { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(u16 v)   { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(s16 v)   { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(u32 v)   { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(s32 v)   { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(s64 v)   { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(u64 v)   { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(f32 v)   { return _Tp(v); }
-template<typename _Tp> inline _Tp saturate_cast(f64 v)   { return _Tp(v); }
-
-template<> inline u8 saturate_cast<u8>(s8 v)      { return (u8)std::max((s32)v, 0); }
-template<> inline u8 saturate_cast<u8>(u16 v)     { return (u8)std::min((u32)v, (u32)UCHAR_MAX); }
-template<> inline u8 saturate_cast<u8>(s32 v)     { return (u8)((u32)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
-template<> inline u8 saturate_cast<u8>(s16 v)     { return saturate_cast<u8>((s32)v); }
-template<> inline u8 saturate_cast<u8>(u32 v)     { return (u8)std::min(v, (u32)UCHAR_MAX); }
-template<> inline u8 saturate_cast<u8>(s64 v)     { return (u8)((u64)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
-template<> inline u8 saturate_cast<u8>(u64 v)     { return (u8)std::min(v, (u64)UCHAR_MAX); }
-template<> inline u8 saturate_cast<u8>(f32 v)     { return saturate_cast<u8>(round(v)); }
-template<> inline u8 saturate_cast<u8>(f64 v)     { return saturate_cast<u8>(round(v)); }
-
-template<> inline s8 saturate_cast<s8>(u8 v)      { return (s8)std::min((s32)v, SCHAR_MAX); }
-template<> inline s8 saturate_cast<s8>(u16 v)     { return (s8)std::min((u32)v, (u32)SCHAR_MAX); }
-template<> inline s8 saturate_cast<s8>(s32 v)     { return (s8)((u32)(v-SCHAR_MIN) <= (u32)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
-template<> inline s8 saturate_cast<s8>(s16 v)     { return saturate_cast<s8>((s32)v); }
-template<> inline s8 saturate_cast<s8>(u32 v)     { return (s8)std::min(v, (u32)SCHAR_MAX); }
-template<> inline s8 saturate_cast<s8>(s64 v)     { return (s8)((u64)(v-SCHAR_MIN) <= (u64)UCHAR_MAX ? v : v > 0 ? SCHAR_MAX : SCHAR_MIN); }
-template<> inline s8 saturate_cast<s8>(u64 v)     { return (s8)std::min(v, (u64)SCHAR_MAX); }
-template<> inline s8 saturate_cast<s8>(f32 v)     { return saturate_cast<s8>(round(v)); }
-template<> inline s8 saturate_cast<s8>(f64 v)     { return saturate_cast<s8>(round(v)); }
-
-template<> inline u16 saturate_cast<u16>(s8 v)    { return (u16)std::max((s32)v, 0); }
-template<> inline u16 saturate_cast<u16>(s16 v)   { return (u16)std::max((s32)v, 0); }
-template<> inline u16 saturate_cast<u16>(s32 v)   { return (u16)((u32)v <= (u32)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
-template<> inline u16 saturate_cast<u16>(u32 v)   { return (u16)std::min(v, (u32)USHRT_MAX); }
-template<> inline u16 saturate_cast<u16>(s64 v)   { return (u16)((u64)v <= (u64)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
-template<> inline u16 saturate_cast<u16>(u64 v)   { return (u16)std::min(v, (u64)USHRT_MAX); }
-template<> inline u16 saturate_cast<u16>(f32 v)   { return saturate_cast<u16>(round(v)); }
-template<> inline u16 saturate_cast<u16>(f64 v)   { return saturate_cast<u16>(round(v)); }
-
-template<> inline s16 saturate_cast<s16>(u16 v)   { return (s16)std::min((s32)v, SHRT_MAX); }
-template<> inline s16 saturate_cast<s16>(s32 v)   { return (s16)((u32)(v - SHRT_MIN) <= (u32)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
-template<> inline s16 saturate_cast<s16>(u32 v)   { return (s16)std::min(v, (u32)SHRT_MAX); }
-template<> inline s16 saturate_cast<s16>(s64 v)   { return (s16)((u64)(v - SHRT_MIN) <= (u64)USHRT_MAX ? v : v > 0 ? SHRT_MAX : SHRT_MIN); }
-template<> inline s16 saturate_cast<s16>(u64 v)   { return (s16)std::min(v, (u64)SHRT_MAX); }
-template<> inline s16 saturate_cast<s16>(f32 v)   { return saturate_cast<s16>(round(v)); }
-template<> inline s16 saturate_cast<s16>(f64 v)   { return saturate_cast<s16>(round(v)); }
-
-template<> inline u32 saturate_cast<u32>(s8 v)    { return (u32)std::max(v, (s8)0); }
-template<> inline u32 saturate_cast<u32>(s16 v)   { return (u32)std::max(v, (s16)0); }
-template<> inline u32 saturate_cast<u32>(s32 v)   { return (u32)std::max(v, (s32)0); }
-template<> inline u32 saturate_cast<u32>(s64 v)   { return (u32)((u64)v <= (u64)UINT_MAX ? v : v > 0 ? UINT_MAX : 0); }
-template<> inline u32 saturate_cast<u32>(u64 v)   { return (u32)std::min(v, (u64)UINT_MAX); }
-//OpenCV like f32/f64 -> u32 conversion
-//we intentionally do not clip negative numbers, to make -1 become 0xffffffff etc.
-template<> inline u32 saturate_cast<u32>(f32 v)   { return round(v); }
-template<> inline u32 saturate_cast<u32>(f64 v)   { return round(v); }
-//Negative clipping implementation
-//template<> inline u32 saturate_cast<u32>(f32 v)   { return saturate_cast<u32>(round(v)); }
-//template<> inline u32 saturate_cast<u32>(f64 v)   { return saturate_cast<u32>(round(v)); }
-
-template<> inline s32 saturate_cast<s32>(u32 v)   { return (s32)std::min(v, (u32)INT_MAX); }
-template<> inline s32 saturate_cast<s32>(s64 v)   { return (s32)((u64)(v - INT_MIN) <= (u64)UINT_MAX ? v : v > 0 ? INT_MAX : INT_MIN); }
-template<> inline s32 saturate_cast<s32>(u64 v)   { return (s32)std::min(v, (u64)INT_MAX); }
-template<> inline s32 saturate_cast<s32>(f32 v)   { return round(v); }
-template<> inline s32 saturate_cast<s32>(f64 v)   { return round(v); }
-
-template<> inline u64 saturate_cast<u64>(s8 v)    { return (u64)std::max(v, (s8)0); }
-template<> inline u64 saturate_cast<u64>(s16 v)   { return (u64)std::max(v, (s16)0); }
-template<> inline u64 saturate_cast<u64>(s32 v)   { return (u64)std::max(v, (s32)0); }
-template<> inline u64 saturate_cast<u64>(s64 v)   { return (u64)std::max(v, (s64)0); }
-
-template<> inline s64 saturate_cast<s64>(u64 v)   { return (s64)std::min(v, (u64)LLONG_MAX); }
-
-} }
-
-#endif
--- a/3rdparty/carotene/src/scharr.cpp
+++ b/3rdparty/carotene/src/scharr.cpp
@ -1,219 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include <vector>
-
-#include "common.hpp"
-
-namespace CAROTENE_NS {
-
-bool isScharr3x3Supported(const Size2D &size, BORDER_MODE border, s32 dx, s32 dy, Margin borderMargin)
-{
-    return (dx == 0 && dy == 1 &&
-                   isSeparableFilter3x3Supported(size, border, 3, 1, borderMargin)) ||
-           (dx == 1 && dy == 0 &&
-                   isSeparableFilter3x3Supported(size, border, 1, 3, borderMargin));
-}
-
-void Scharr3x3(const Size2D &size,
-               const u8 * srcBase, ptrdiff_t srcStride,
-               s16 * dstBase, ptrdiff_t dstStride,
-               s32 dx, s32 dy,
-               BORDER_MODE border, u8 borderValue, Margin borderMargin)
-{
-    internal::assertSupportedConfiguration(isScharr3x3Supported(size, border, dx, dy, borderMargin));
-#ifdef CAROTENE_NEON
-    static s16 dw[] = {3, 10, 3};
-
-    if (dy == 1)
-        SeparableFilter3x3(size, srcBase, srcStride, dstBase, dstStride,
-                           3, 1, dw, 0,
-                           border, borderValue, borderMargin);
-    else
-        SeparableFilter3x3(size, srcBase, srcStride, dstBase, dstStride,
-                           1, 3, 0, dw,
-                           border, borderValue, borderMargin);
-#else
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderValue;
-#endif
-}
-
-void ScharrDeriv(const Size2D &size, s32 cn,
-                 const u8 * srcBase, ptrdiff_t srcStride,
-                 s16 * dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t colsn = size.width*cn;
-    size_t roiw8 = colsn > 7 ? colsn - 7 : 0;
-
-    ptrdiff_t delta = (ptrdiff_t)(((size.width + 2)*cn + 15) & -16);//align size
-    std::vector<s16> _tempBuf((delta << 1) + 64);
-    s16 *trow0 = internal::alignPtr(&_tempBuf[cn], 16), *trow1 = internal::alignPtr(trow0 + delta, 16);
-
-    int16x8_t vc3 = vmovq_n_s16(3);
-    int16x8_t vc10 = vmovq_n_s16(10);
-    uint8x8_t v8c10 = vmov_n_u8(10);
-
-    for(size_t y = 0; y < size.height; y++ )
-    {
-        const u8* srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : size.height > 1 ? 1 : 0);
-        const u8* srow1 = internal::getRowPtr(srcBase, srcStride, y);
-        const u8* srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height > 1 ? size.height-2 : 0);
-        s16* drow = internal::getRowPtr(dstBase, dstStride, y);
-
-        // do vertical convolution
-        size_t x = 0;
-        for( ; x < roiw8; x += 8 )
-        {
-            internal::prefetch(srow0 + x);
-            internal::prefetch(srow1 + x);
-            internal::prefetch(srow2 + x);
-#if __GNUC_MINOR__ < 7
-            __asm__ (
-                "vld1.8 {d0}, [%[src0]]                                \n\t"
-                "vld1.8 {d2}, [%[src2]]                                \n\t"
-                "vld1.8 {d1}, [%[src1]]                                \n\t"
-                "vaddl.u8 q2, d2, d0                                   \n\t"
-                "vmull.u8 q3, d1, %[vc10]                              \n\t"
-                "vsubl.u8 q4, d2, d0                                   \n\t"
-                "vmla.s16 q3, q2, %q[vc3]                              \n\t"
-                "vst1.16 {d8-d9}, [%[out1],:128]                       \n\t"
-                "vst1.16 {d6-d7}, [%[out0],:128]                       \n\t"
-                :
-                : [out0] "r" (trow0 + x),
-                  [out1] "r" (trow1 + x),
-                  [src0] "r" (srow0 + x),
-                  [src1] "r" (srow1 + x),
-                  [src2] "r" (srow2 + x),
-                  [vc10] "w" (v8c10), [vc3] "w" (vc3)
-                : "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15"
-            );
-#else
-            uint8x8_t s0 = vld1_u8(srow0 + x);
-            uint8x8_t s1 = vld1_u8(srow1 + x);
-            uint8x8_t s2 = vld1_u8(srow2 + x);
-
-            int16x8_t s1x10 = vreinterpretq_s16_u16(vmull_u8(s1, v8c10));
-            int16x8_t s02 = vreinterpretq_s16_u16(vaddl_u8(s2, s0));
-            int16x8_t t1 = vreinterpretq_s16_u16(vsubl_u8(s2, s0));
-            int16x8_t t0 = vmlaq_s16(s1x10, s02, vc3);
-
-            vst1q_s16(trow1 + x, t1);
-            vst1q_s16(trow0 + x, t0);
-#endif
-        }
-        for( ; x < colsn; x++ )
-        {
-            trow0[x] = (s16)((srow0[x] + srow2[x])*3 + srow1[x]*10);
-            trow1[x] = (s16)(srow2[x] - srow0[x]);
-        }
-
-        // make border
-        size_t x0 = (size.width > 1 ? cn : 0), x1 = (size.width > 1 ? (size.width-2)*cn : 0);
-        for( s32 k = 0; k < cn; k++ )
-        {
-            trow0[-cn + k] = trow0[x0 + k]; trow0[colsn + k] = trow0[x1 + k];
-            trow1[-cn + k] = trow1[x0 + k]; trow1[colsn + k] = trow1[x1 + k];
-        }
-
-        // do horizontal convolution, interleave the results and store them to dst
-        x = 0;
-        for( ; x < roiw8; x += 8 )
-        {
-#if __GNUC_MINOR__ < 6
-            __asm__ (
-                "vld1.16 {d4-d5}, [%[s2ptr]]                           \n\t"
-                "vld1.16 {d8-d9}, [%[s4ptr]]                           \n\t"
-                "vld1.16 {d6-d7}, [%[s3ptr],:128]                      \n\t"
-                "vld1.16 {d0-d1}, [%[s0ptr]]                           \n\t"
-                "vld1.16 {d2-d3}, [%[s1ptr]]                           \n\t"
-                "vadd.i16 q7, q2, q4                                   \n\t"
-                "vmul.s16 q6, q3, %q[vc10]                             \n\t"
-                "vsub.s16 q5, q1, q0                                   \n\t"
-                "vmla.s16 q6, q7, %q[vc3]                              \n\t"
-                "vst2.16 {d10-d13}, [%[out]]                           \n\t"
-                :
-                : [out] "r" (drow + x * 2),
-                  [s0ptr] "r" (trow0 + x - cn),
-                  [s1ptr] "r" (trow0 + x + cn),
-                  [s2ptr] "r" (trow1 + x - cn),
-                  [s3ptr] "r" (trow1 + x),
-                  [s4ptr] "r" (trow1 + x + cn),
-                  [vc10] "w" (vc10), [vc3] "w" (vc3)
-                : "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d11","d12","d13","d14","d15"
-            );
-#else
-            int16x8_t s0 = vld1q_s16(trow0 + x - cn);
-            int16x8_t s1 = vld1q_s16(trow0 + x + cn);
-            int16x8_t s2 = vld1q_s16(trow1 + x - cn);
-            int16x8_t s3 = vld1q_s16(trow1 + x);
-            int16x8_t s4 = vld1q_s16(trow1 + x + cn);
-
-            int16x8_t s3x10 = vmulq_s16(s3, vc10);
-            int16x8_t s24 = vaddq_s16(s2, s4);
-
-            int16x8x2_t vr;
-            vr.val[0] = vsubq_s16(s1, s0);
-            vr.val[1] = vmlaq_s16(s3x10, s24, vc3);
-
-            vst2q_s16(drow + x*2, vr);
-#endif //__GNUC_MINOR__ < 6
-        }
-        for( ; x < colsn; x++ )
-        {
-            drow[x*2] = (s16)(trow0[x+cn] - trow0[x-cn]);
-            drow[x*2+1] = (s16)((trow1[x+cn] + trow1[x-cn])*3 + trow1[x]*10);
-        }
-    }
-#else
-    (void)size;
-    (void)cn;
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/separable_filter.cpp
+++ b/3rdparty/carotene/src/separable_filter.cpp
@ -1,109 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include "separable_filter.hpp"
-
-namespace CAROTENE_NS {
-
-bool isSeparableFilter3x3Supported(const Size2D &size, BORDER_MODE border, s32 dx, s32 dy, Margin borderMargin)
-{
-    return isSupportedConfiguration() &&
-        size.width >= 9 && size.height >= 1 &&
-        (size.height + borderMargin.top + borderMargin.bottom) >= 2  &&
-        (dx >= 0) && (dx < 4) && (dy >= 0) && (dy < 4) &&
-        (border == BORDER_MODE_CONSTANT   ||
-         border == BORDER_MODE_REFLECT    ||
-         border == BORDER_MODE_REFLECT101 ||
-         border == BORDER_MODE_REPLICATE   );
-}
-
-void SeparableFilter3x3(const Size2D &size,
-                        const u8 * srcBase, ptrdiff_t srcStride,
-                        s16 * dstBase, ptrdiff_t dstStride,
-                        const u8 rowFilter, const u8 colFilter, const s16 *xw, const s16 *yw,
-                        BORDER_MODE border, u8 borderValue, Margin borderMargin)
-{
-    internal::assertSupportedConfiguration(isSeparableFilter3x3Supported(size, border, rowFilter, colFilter, borderMargin));
-#ifdef CAROTENE_NEON
-    if(!((xw || rowFilter < 3) && (yw || colFilter < 3)))
-        std::abort();//Couldn't call generic filter without provided weights
-
-    typedef void (*sepFilter3x3_8u16s_func)(const Size2D&, const u8*, ptrdiff_t, s16*, ptrdiff_t,
-                                            const s16*, const s16*, BORDER_MODE, u8, Margin);
-
-    static sepFilter3x3_8u16s_func quickFilters[4][4]=
-    {
-    /*d0y*/{ /*d0x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_121,    internal::ColFilter3x3S16_121>::process,
-             /*dx*/  internal::sepFilter3x3<internal::RowFilter3x3S16_m101,   internal::ColFilter3x3S16_121>::process,
-             /*d2x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_1m21,   internal::ColFilter3x3S16_121>::process,
-             /*dNx*/ internal::sepFilter3x3<internal::RowFilter3x3S16Generic, internal::ColFilter3x3S16_121>::process},
-
-    /*dy */{ /*d0x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_121,    internal::ColFilter3x3S16_m101>::process,
-             /*dx*/  internal::sepFilter3x3<internal::RowFilter3x3S16_m101,   internal::ColFilter3x3S16_m101>::process,
-             /*d2x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_1m21,   internal::ColFilter3x3S16_m101>::process,
-             /*dNx*/ internal::sepFilter3x3<internal::RowFilter3x3S16Generic, internal::ColFilter3x3S16_m101>::process},
-
-    /*d2y*/{ /*d0x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_121,    internal::ColFilter3x3S16_1m21>::process,
-             /*dx*/  internal::sepFilter3x3<internal::RowFilter3x3S16_m101,   internal::ColFilter3x3S16_1m21>::process,
-             /*d2x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_1m21,   internal::ColFilter3x3S16_1m21>::process,
-             /*dNx*/ internal::sepFilter3x3<internal::RowFilter3x3S16Generic, internal::ColFilter3x3S16_1m21>::process},
-
-    /*dNy*/{ /*d0x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_121,    internal::ColFilter3x3S16Generic>::process,
-             /*dx*/  internal::sepFilter3x3<internal::RowFilter3x3S16_m101,   internal::ColFilter3x3S16Generic>::process,
-             /*d2x*/ internal::sepFilter3x3<internal::RowFilter3x3S16_1m21,   internal::ColFilter3x3S16Generic>::process,
-             /*dNx*/ internal::sepFilter3x3<internal::RowFilter3x3S16Generic, internal::ColFilter3x3S16Generic>::process}
-    };
-
-    quickFilters[colFilter][rowFilter](size, srcBase, srcStride, dstBase, dstStride,
-                                       xw, yw, border, borderValue, borderMargin);
-#else
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)xw;
-    (void)yw;
-    (void)borderValue;
-#endif
-}
-
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/separable_filter.hpp
+++ b/3rdparty/carotene/src/separable_filter.hpp
--- a/3rdparty/carotene/src/sobel.cpp
+++ b/3rdparty/carotene/src/sobel.cpp
@ -1,317 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include <vector>
-
-#include "common.hpp"
-
-namespace CAROTENE_NS {
-
-bool isSobel3x3Supported(const Size2D &size, BORDER_MODE border,
-                         s32 dx, s32 dy, Margin borderMargin)
-{
-    return dx < 3 && dx >= 0 &&
-           dy < 3 && dy >= 0 &&
-           (dx + dy) > 0 &&
-           isSeparableFilter3x3Supported(size, border, dx, dy, borderMargin);
-}
-
-void Sobel3x3(const Size2D &size,
-              const u8 * srcBase, ptrdiff_t srcStride,
-              s16 * dstBase, ptrdiff_t dstStride,
-              s32 dx, s32 dy,
-              BORDER_MODE borderType, u8 borderValue, Margin borderMargin)
-{
-    internal::assertSupportedConfiguration(isSobel3x3Supported(size, borderType, dx, dy, borderMargin));
-#ifdef CAROTENE_NEON
-    SeparableFilter3x3(size, srcBase, srcStride, dstBase, dstStride,
-                       dx, dy, 0, 0,
-                       borderType, borderValue, borderMargin);
-#else
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderValue;
-#endif
-}
-
-bool isSobel3x3f32Supported(const Size2D &size, BORDER_MODE border,
-                            s32 dx, s32 dy)
-{
-    return isSupportedConfiguration() &&
-           dx < 3 && dx >= 0 &&
-           dy < 3 && dy >= 0 &&
-           (dx + dy) > 0 &&
-           size.width >= 4 && size.height >= 2 &&
-           (border == BORDER_MODE_CONSTANT   ||
-            border == BORDER_MODE_REFLECT    ||
-            border == BORDER_MODE_REFLECT101 ||
-            border == BORDER_MODE_REPLICATE   );
-}
-
-void Sobel3x3(const Size2D &size,
-              const f32 * srcBase, ptrdiff_t srcStride,
-              f32 * dstBase, ptrdiff_t dstStride,
-              s32 dx, s32 dy,
-              BORDER_MODE borderType, f32 borderValue)
-{
-    internal::assertSupportedConfiguration(isSobel3x3f32Supported(size, borderType, dx, dy));
-#ifdef CAROTENE_NEON
-    std::vector<f32> _tmp;
-    f32 *tmp = 0;
-    if (borderType == BORDER_MODE_CONSTANT)
-    {
-        _tmp.assign(size.width + 2, borderValue);
-        tmp = &_tmp[1];
-    }
-
-    ptrdiff_t delta = (ptrdiff_t)((size.width + 2 + 31) & -32);//align size
-    std::vector<f32> _tempBuf((delta << 1) + 64);
-    f32 *trow0 = internal::alignPtr(&_tempBuf[1], 32), *trow1 = internal::alignPtr(trow0 + delta, 32);
-
-    for( size_t y = 0; y < size.height; y++ )
-    {
-        const f32* srow0;
-        const f32* srow1 = internal::getRowPtr(srcBase, srcStride, y);
-        const f32* srow2;
-        f32* drow = internal::getRowPtr(dstBase, dstStride, y > 0 ? y-1 : 0);
-        f32* drow1 = internal::getRowPtr(dstBase, dstStride, y);
-        if (borderType == BORDER_MODE_REFLECT101) {
-            srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 1);
-            srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-2);
-        } else  if (borderType == BORDER_MODE_CONSTANT) {
-            srow0 = y > 0 ? internal::getRowPtr(srcBase, srcStride, y-1) : tmp;
-            srow2 =  y < size.height-1 ? internal::getRowPtr(srcBase, srcStride, y+1) : tmp;
-        } else { // BORDER_MODE_REFLECT || BORDER_MODE_REPLICATE
-            srow0 = internal::getRowPtr(srcBase, srcStride, y > 0 ? y-1 : 0);
-            srow2 = internal::getRowPtr(srcBase, srcStride, y < size.height-1 ? y+1 : size.height-1);
-        }
-
-        float32x4_t tprev = vmovq_n_f32(0.f);
-        float32x4_t tcurr = vmovq_n_f32(0.f);
-        float32x4_t tnext = vmovq_n_f32(0.f);
-        float32x4_t t0, t1, t2;
-        // do vertical convolution
-        size_t x = 0, bcolsn = y + 2 < size.height ? size.width : (size.width - 4);
-        for( ; x <= bcolsn; x += 4 )
-        {
-            internal::prefetch(srow0 + x);
-            internal::prefetch(srow1 + x);
-            internal::prefetch(srow2 + x);
-
-            float32x4_t x0 = vld1q_f32(srow0 + x);
-            float32x4_t x1 = vld1q_f32(srow1 + x);
-            float32x4_t x2 = vld1q_f32(srow2 + x);
-
-            tprev = tcurr;
-            tcurr = tnext;
-            if(!dy)
-            {
-                tnext = vaddq_f32(vaddq_f32(vaddq_f32(x1, x1), x2), x0);
-            }
-            else if(dy == 2)
-            {
-                tnext = vsubq_f32(vsubq_f32(x2, x1), vsubq_f32(x1, x0));
-            }
-            else
-            {
-                tnext = vsubq_f32(x2, x0);
-            }
-
-            if(!x) {
-                tcurr = tnext;
-                // make border
-                if (borderType == BORDER_MODE_CONSTANT)
-                {
-                    tcurr = vsetq_lane_f32(borderValue,tcurr, 3);
-                }
-                else if (borderType == BORDER_MODE_REFLECT101)
-                {
-                    tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 1),tcurr, 3);
-                }
-                else // BORDER_MODE_REFLECT || BORDER_MODE_REPLICATE
-                {
-                    tcurr = vsetq_lane_f32(vgetq_lane_f32(tcurr, 0),tcurr, 3);
-                }
-                continue;
-            }
-
-            internal::prefetch(trow0 + x);
-            internal::prefetch(trow1 + x);
-
-            t0 = vextq_f32(tprev, tcurr, 3);
-            t1 = tcurr;
-            t2 = vextq_f32(tcurr, tnext, 1);
-            if(!dx)
-            {
-                t0 = vaddq_f32(t0, vaddq_f32(vaddq_f32(t1, t1), t2));
-            }
-            else if(dx == 2)
-            {
-                t0 = vsubq_f32(vsubq_f32(t2, t1), vsubq_f32(t1, t0));
-            }
-            else
-            {
-                t0 = vsubq_f32(t2, t0);
-            }
-
-            if(!(y%2))
-            {
-                vst1q_f32(trow0 + x - 4, t0);
-            }
-            else
-            {
-                vst1q_f32(trow1 + x - 4, t0);
-            }
-        }
-        x -= 4;
-        if(x == size.width){
-            x--;
-        }
-        f32 prevx = 0, rowx = 0, nextx = 0;
-        if(!dy)
-        {
-            prevx = x > 0 ? srow2[x-1] + 2*srow1[x-1] + srow0[x-1] :
-                    (borderType == BORDER_MODE_REFLECT101 ? srow2[1] + 2*srow1[1] + srow0[1] :
-                    (borderType == BORDER_MODE_CONSTANT   ? 4*borderValue :
-                                                            srow2[0] + 2*srow1[0] + srow0[0]) );
-            rowx  = srow2[x] + 2*srow1[x] + srow0[x];
-        }
-        else if(dy == 2)
-        {
-            prevx = x > 0 ? srow2[x-1] - 2*srow1[x-1] + srow0[x-1] :
-                    (borderType == BORDER_MODE_REFLECT101 ? srow2[1] - 2*srow1[1] + srow0[1] :
-                    (borderType == BORDER_MODE_CONSTANT   ? 0.f :
-                                                            srow2[0] - 2*srow1[0] + srow0[0]) );
-            rowx  = srow2[x] - 2*srow1[x] + srow0[x];
-        }
-        else
-        {
-            prevx = x > 0 ? srow2[x-1] - srow0[x-1] :
-                    (borderType == BORDER_MODE_REFLECT101 ? srow2[1] - srow0[1] :
-                    (borderType == BORDER_MODE_CONSTANT   ? 0.f :
-                                                            srow2[0] - srow0[0]) );
-            rowx  = srow2[x] - srow0[x];
-        }
-
-        for( ; x < size.width; x++ )
-        {
-            if(x+1 == size.width) {
-                // make border
-                if (borderType == BORDER_MODE_CONSTANT)
-                {
-                    if(!dy) {
-                        nextx = 4*borderValue;
-                    } else {
-                        nextx = 0.f;
-                    }
-                } else if (borderType == BORDER_MODE_REFLECT101)
-                {
-                    if(!dy) {
-                        nextx = srow2[x-1] + 2*srow1[x-1] + srow0[x-1];
-                    } else if(dy == 2) {
-                        nextx = srow2[x-1] - 2*srow1[x-1] + srow0[x-1];
-                    } else {
-                        nextx = srow2[x-1] - srow0[x-1];
-                    }
-                } else {
-                    if(!dy) {
-                        nextx = srow2[x] + 2*srow1[x] + srow0[x];
-                    } else if(dy == 2) {
-                        nextx = srow2[x] - 2*srow1[x] + srow0[x];
-                    } else {
-                        nextx = srow2[x] - srow0[x];
-                    }
-                }
-            } else {
-                if(!dy) {
-                    nextx = srow2[x+1] + 2*srow1[x+1] + srow0[x+1];
-                } else if(dy == 2) {
-                    nextx = srow2[x+1] - 2*srow1[x+1] + srow0[x+1];
-                } else {
-                    nextx = srow2[x+1] - srow0[x+1];
-                }
-            }
-            f32 res;
-            if(dx==1) {
-                res = nextx - prevx;
-            } else if(!dx) {
-                res = prevx + 2*rowx + nextx;
-            } else {
-                res = prevx - 2*rowx + nextx;
-            }
-            if(!(y%2)) {
-                *(trow0+x) = res;
-            } else {
-                *(trow1+x) = res;
-            }
-            prevx = rowx;
-            rowx = nextx;
-        }
-
-        if(y>0) {
-            for(size_t x1 = 0; x1 < size.width; x1++ )
-            {
-                if(y%2)
-                    *(drow + x1) = trow0[x1];
-                else
-                    *(drow + x1) = trow1[x1];
-            }
-        }
-        if(y == size.height-1) {
-            for(size_t x1 = 0; x1 < size.width; x1++ )
-            {
-                if(!(y%2))
-                    *(drow1 + x1) = trow0[x1];
-                else
-                    *(drow1 + x1) = trow1[x1];
-            }
-        }
-    }
-#else
-    (void)srcBase;
-    (void)srcStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderValue;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/sub.cpp
+++ b/3rdparty/carotene/src/sub.cpp
@ -1,621 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-#ifdef CAROTENE_NEON
-
-namespace {
-
-template <typename T, typename WT>
-struct SubWrap
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        v_dst = internal::vsubq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        v_dst = internal::vsub(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = (T)((WT)src0[0] - (WT)src1[0]);
-    }
-};
-
-template <typename T, typename WT>
-struct SubSaturate
-{
-    typedef T type;
-
-    void operator() (const typename internal::VecTraits<T>::vec128 & v_src0,
-                     const typename internal::VecTraits<T>::vec128 & v_src1,
-                     typename internal::VecTraits<T>::vec128 & v_dst) const
-    {
-        v_dst = internal::vqsubq(v_src0, v_src1);
-    }
-
-    void operator() (const typename internal::VecTraits<T>::vec64 & v_src0,
-                     const typename internal::VecTraits<T>::vec64 & v_src1,
-                     typename internal::VecTraits<T>::vec64 & v_dst) const
-    {
-        v_dst = internal::vqsub(v_src0, v_src1);
-    }
-
-    void operator() (const T * src0, const T * src1, T * dst) const
-    {
-        dst[0] = internal::saturate_cast<T>((WT)src0[0] - (WT)src1[0]);
-    }
-};
-
-} // namespace
-
-#endif
-
-void sub(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const u8 * src1Base, ptrdiff_t src1Stride,
-         u8 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubSaturate<u8, s16>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubWrap<u8, s16>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const u8 * src1Base, ptrdiff_t src1Stride,
-         s16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const u8 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        u16 * dstu16 = internal::getRowPtr((u16 *)dstBase, dstStride, i);
-        s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw32; j += 32)
-        {
-            internal::prefetch(src0 + j);
-            internal::prefetch(src1 + j);
-            uint8x16_t v_src00 = vld1q_u8(src0 + j), v_src01 = vld1q_u8(src0 + j + 16);
-            uint8x16_t v_src10 = vld1q_u8(src1 + j), v_src11 = vld1q_u8(src1 + j + 16);
-            vst1q_u16(dstu16 + j, vsubl_u8(vget_low_u8(v_src00), vget_low_u8(v_src10)));
-            vst1q_u16(dstu16 + j + 8, vsubl_u8(vget_high_u8(v_src00), vget_high_u8(v_src10)));
-            vst1q_u16(dstu16 + j + 16, vsubl_u8(vget_low_u8(v_src01), vget_low_u8(v_src11)));
-            vst1q_u16(dstu16 + j + 24, vsubl_u8(vget_high_u8(v_src01), vget_high_u8(v_src11)));
-        }
-        for (; j < roiw8; j += 8)
-        {
-            uint8x8_t v_src0 = vld1_u8(src0 + j);
-            uint8x8_t v_src1 = vld1_u8(src1 + j);
-            vst1q_u16(dstu16 + j, vsubl_u8(v_src0, v_src1));
-        }
-
-        for (; j < size.width; j++)
-            dst[j] = (s16)src0[j] - (s16)src1[j];
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void sub(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const u8 * src1Base, ptrdiff_t src1Stride,
-         f32 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw32 = size.width >= 31 ? size.width - 31 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const u8 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        f32 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        for (; j < roiw32; j += 32)
-        {
-            internal::prefetch(src0 + j);
-            internal::prefetch(src1 + j);
-            uint8x16_t v_src00 = vld1q_u8(src0 + j), v_src01 = vld1q_u8(src0 + j + 16);
-            uint8x16_t v_src10 = vld1q_u8(src1 + j), v_src11 = vld1q_u8(src1 + j + 16);
-            int16x8_t vsl = vreinterpretq_s16_u16(vsubl_u8( vget_low_u8(v_src00),  vget_low_u8(v_src10)));
-            int16x8_t vsh = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(v_src00), vget_high_u8(v_src10)));
-
-            vst1q_f32(dst + j +  0, vcvtq_f32_s32(vmovl_s16(  vget_low_s16(vsl) )));
-            vst1q_f32(dst + j +  4, vcvtq_f32_s32(vmovl_s16( vget_high_s16(vsl) )));
-            vst1q_f32(dst + j +  8, vcvtq_f32_s32(vmovl_s16(  vget_low_s16(vsh) )));
-            vst1q_f32(dst + j + 12, vcvtq_f32_s32(vmovl_s16( vget_high_s16(vsh) )));
-
-            vsl = vreinterpretq_s16_u16(vsubl_u8( vget_low_u8(v_src01),  vget_low_u8(v_src11)));
-            vsh = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(v_src01), vget_high_u8(v_src11)));
-
-            vst1q_f32(dst + j + 16, vcvtq_f32_s32(vmovl_s16(  vget_low_s16(vsl) )));
-            vst1q_f32(dst + j + 20, vcvtq_f32_s32(vmovl_s16( vget_high_s16(vsl) )));
-            vst1q_f32(dst + j + 24, vcvtq_f32_s32(vmovl_s16(  vget_low_s16(vsh) )));
-            vst1q_f32(dst + j + 28, vcvtq_f32_s32(vmovl_s16( vget_high_s16(vsh) )));
-        }
-        for (; j < roiw8; j += 8)
-        {
-            uint8x8_t v_src0 = vld1_u8(src0 + j);
-            uint8x8_t v_src1 = vld1_u8(src1 + j);
-
-            int16x8_t vs = vreinterpretq_s16_u16(vsubl_u8(v_src0, v_src1));
-            vst1q_f32(dst + j + 0, vcvtq_f32_s32(vmovl_s16(  vget_low_s16(vs) )));
-            vst1q_f32(dst + j + 4, vcvtq_f32_s32(vmovl_s16( vget_high_s16(vs) )));
-        }
-        for(; j < size.width; j++)
-            dst[j] = (f32)src0[j] - (f32)src1[j];
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-void sub(const Size2D &size,
-         const u8 * src0Base, ptrdiff_t src0Stride,
-         const s16 * src1Base, ptrdiff_t src1Stride,
-         s16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const s16 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        if (policy == CONVERT_POLICY_SATURATE)
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-                uint8x16_t v_src0 = vld1q_u8(src0 + j);
-                int16x8_t v_src00 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src0)));
-                int16x8_t v_src01 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src0)));
-                int16x8_t v_src10 = vld1q_s16(src1 + j), v_src11 = vld1q_s16(src1 + j + 8);
-                int16x8_t v_dst0 = vqsubq_s16(v_src00, v_src10);
-                int16x8_t v_dst1 = vqsubq_s16(v_src01, v_src11);
-                vst1q_s16(dst + j, v_dst0);
-                vst1q_s16(dst + j + 8, v_dst1);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src0 + j)));
-                int16x8_t v_src1 = vld1q_s16(src1 + j);
-                int16x8_t v_dst = vqsubq_s16(v_src0, v_src1);
-                vst1q_s16(dst + j, v_dst);
-            }
-
-            for (; j < size.width; j++)
-                dst[j] = internal::saturate_cast<s16>((s32)src0[j] - (s32)src1[j]);
-        }
-        else
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-                uint8x16_t v_src0 = vld1q_u8(src0 + j);
-                int16x8_t v_src00 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src0)));
-                int16x8_t v_src01 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src0)));
-                int16x8_t v_src10 = vld1q_s16(src1 + j), v_src11 = vld1q_s16(src1 + j + 8);
-                int16x8_t v_dst0 = vsubq_s16(v_src00, v_src10);
-                int16x8_t v_dst1 = vsubq_s16(v_src01, v_src11);
-                vst1q_s16(dst + j, v_dst0);
-                vst1q_s16(dst + j + 8, v_dst1);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src0 + j)));
-                int16x8_t v_src1 = vld1q_s16(src1 + j);
-                int16x8_t v_dst = vsubq_s16(v_src0, v_src1);
-                vst1q_s16(dst + j, v_dst);
-            }
-
-            for (; j < size.width; j++)
-                dst[j] = (s16)((s32)src0[j] - (s32)src1[j]);
-        }
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const s16 * src0Base, ptrdiff_t src0Stride,
-         const u8 * src1Base, ptrdiff_t src1Stride,
-         s16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    size_t roiw16 = size.width >= 15 ? size.width - 15 : 0;
-    size_t roiw8 = size.width >= 7 ? size.width - 7 : 0;
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const s16 * src0 = internal::getRowPtr(src0Base, src0Stride, i);
-        const u8 * src1 = internal::getRowPtr(src1Base, src1Stride, i);
-        s16 * dst = internal::getRowPtr(dstBase, dstStride, i);
-        size_t j = 0;
-
-        if (policy == CONVERT_POLICY_SATURATE)
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-                int16x8_t v_src00 = vld1q_s16(src0 + j), v_src01 = vld1q_s16(src0 + j + 8);
-                uint8x16_t v_src1 = vld1q_u8(src1 + j);
-                int16x8_t v_src10 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src1)));
-                int16x8_t v_src11 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src1)));
-                int16x8_t v_dst0 = vqsubq_s16(v_src00, v_src10);
-                int16x8_t v_dst1 = vqsubq_s16(v_src01, v_src11);
-                vst1q_s16(dst + j, v_dst0);
-                vst1q_s16(dst + j + 8, v_dst1);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src0 = vld1q_s16(src0 + j);
-                int16x8_t v_src1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src1 + j)));
-                int16x8_t v_dst = vqsubq_s16(v_src0, v_src1);
-                vst1q_s16(dst + j, v_dst);
-            }
-
-            for (; j < size.width; j++)
-                dst[j] = internal::saturate_cast<s16>((s32)src0[j] - (s32)src1[j]);
-        }
-        else
-        {
-            for (; j < roiw16; j += 16)
-            {
-                internal::prefetch(src0 + j);
-                internal::prefetch(src1 + j);
-                int16x8_t v_src00 = vld1q_s16(src0 + j), v_src01 = vld1q_s16(src0 + j + 8);
-                uint8x16_t v_src1 = vld1q_u8(src1 + j);
-                int16x8_t v_src10 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(v_src1)));
-                int16x8_t v_src11 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(v_src1)));
-                int16x8_t v_dst0 = vsubq_s16(v_src00, v_src10);
-                int16x8_t v_dst1 = vsubq_s16(v_src01, v_src11);
-                vst1q_s16(dst + j, v_dst0);
-                vst1q_s16(dst + j + 8, v_dst1);
-            }
-            for (; j < roiw8; j += 8)
-            {
-                int16x8_t v_src0 = vld1q_s16(src0 + j);
-                int16x8_t v_src1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(src1 + j)));
-                int16x8_t v_dst = vsubq_s16(v_src0, v_src1);
-                vst1q_s16(dst + j, v_dst);
-            }
-
-            for (; j < size.width; j++)
-                dst[j] = (s16)((s32)src0[j] - (s32)src1[j]);
-        }
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const s8 * src0Base, ptrdiff_t src0Stride,
-         const s8 * src1Base, ptrdiff_t src1Stride,
-         s8 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubSaturate<s8, s16>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubWrap<s8, s16>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const s16 * src0Base, ptrdiff_t src0Stride,
-         const s16 * src1Base, ptrdiff_t src1Stride,
-         s16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubSaturate<s16, s32>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubWrap<s16, s32>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const u16 * src0Base, ptrdiff_t src0Stride,
-         const u16 * src1Base, ptrdiff_t src1Stride,
-         u16 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubSaturate<u16, s32>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubWrap<u16, s32>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const s32 * src0Base, ptrdiff_t src0Stride,
-         const s32 * src1Base, ptrdiff_t src1Stride,
-         s32 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubSaturate<s32, s64>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubWrap<s32, s64>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const u32 * src0Base, ptrdiff_t src0Stride,
-         const u32 * src1Base, ptrdiff_t src1Stride,
-         u32 *dstBase, ptrdiff_t dstStride,
-         CONVERT_POLICY policy)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    if (policy == CONVERT_POLICY_SATURATE)
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubSaturate<u32, s64>());
-    }
-    else
-    {
-        internal::vtransform(size,
-                             src0Base, src0Stride,
-                             src1Base, src1Stride,
-                             dstBase, dstStride,
-                             SubWrap<u32, s64>());
-    }
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)policy;
-#endif
-}
-
-void sub(const Size2D &size,
-         const f32 * src0Base, ptrdiff_t src0Stride,
-         const f32 * src1Base, ptrdiff_t src1Stride,
-         f32 *dstBase, ptrdiff_t dstStride)
-{
-    internal::assertSupportedConfiguration();
-#ifdef CAROTENE_NEON
-    internal::vtransform(size,
-                         src0Base, src0Stride,
-                         src1Base, src1Stride,
-                         dstBase, dstStride,
-                         SubWrap<f32, f32>());
-#else
-    (void)size;
-    (void)src0Base;
-    (void)src0Stride;
-    (void)src1Base;
-    (void)src1Stride;
-    (void)dstBase;
-    (void)dstStride;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/sum.cpp
+++ b/3rdparty/carotene/src/sum.cpp
@ -1,385 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2012-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include "vtransform.hpp"
-
-namespace CAROTENE_NS {
-
-bool isSumSupported(u32 channels)
-{
-    return (channels && channels < 5);
-}
-
-void sum(const Size2D &_size,
-         const u8 * srcBase, ptrdiff_t srcStride,
-         u32 * sumdst, u32 channels)
-{
-    internal::assertSupportedConfiguration(isSumSupported(channels));
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    const ptrdiff_t width = size.width * channels;
-
-    for(size_t k = 0; k < size.height; ++k)
-    {
-        const u8* src = internal::getRowPtr( srcBase,  srcStride, k);
-        ptrdiff_t i = 0;
-
-        if (channels == 3)
-        {
-            uint32x4_t vs1231 = vdupq_n_u32(0);
-            uint32x4_t vs3123 = vdupq_n_u32(0);
-            uint32x4_t vs2312 = vdupq_n_u32(0);
-            for (; i <= width - 257*8*3; i += 257*8*3, src += 257*8*3)
-            {
-                uint16x8_t s1 = vmovl_u8(vld1_u8(src +  0));
-                uint16x8_t s2 = vmovl_u8(vld1_u8(src +  8));
-                uint16x8_t s3 = vmovl_u8(vld1_u8(src + 16));
-
-                for (ptrdiff_t j = 8*3; j < 257*8*3; j+= 8*3)
-                {
-                    internal::prefetch(src + j + 24);
-                    s1 = vaddw_u8(s1, vld1_u8(src + j +  0));
-                    s2 = vaddw_u8(s2, vld1_u8(src + j +  8));
-                    s3 = vaddw_u8(s3, vld1_u8(src + j + 16));
-                }
-
-                vs1231 = vqaddq_u32(vs1231, vaddl_u16(vget_low_u16(s1), vget_high_u16(s2)));
-                vs3123 = vqaddq_u32(vs3123, vaddl_u16(vget_low_u16(s2), vget_high_u16(s3)));
-                vs2312 = vqaddq_u32(vs2312, vaddl_u16(vget_low_u16(s3), vget_high_u16(s1)));
-            }
-            if (i <= width - 8*3)
-            {
-                uint16x8_t s1 = vmovl_u8(vld1_u8(src +  0));
-                uint16x8_t s2 = vmovl_u8(vld1_u8(src +  8));
-                uint16x8_t s3 = vmovl_u8(vld1_u8(src + 16));
-
-                for (i += 8*3, src += 8*3; i <= width - 8*3; i += 8*3, src += 8*3)
-                {
-                    internal::prefetch(src + 24);
-                    s1 = vaddw_u8(s1, vld1_u8(src +  0));
-                    s2 = vaddw_u8(s2, vld1_u8(src +  8));
-                    s3 = vaddw_u8(s3, vld1_u8(src + 16));
-                }
-
-                vs1231 = vqaddq_u32(vs1231, vaddl_u16(vget_low_u16(s1), vget_high_u16(s2)));
-                vs3123 = vqaddq_u32(vs3123, vaddl_u16(vget_low_u16(s2), vget_high_u16(s3)));
-                vs2312 = vqaddq_u32(vs2312, vaddl_u16(vget_low_u16(s3), vget_high_u16(s1)));
-            }
-
-            u32 sum[12];
-            vst1q_u32(sum+0, vs1231);
-            vst1q_u32(sum+4, vs2312);
-            vst1q_u32(sum+8, vs3123);
-
-            for (; i < width; i += 3, src += 3)
-            {
-                sumdst[0] += src[0];
-                sumdst[1] += src[1];
-                sumdst[2] += src[2];
-            }
-
-            sumdst[0] += sum[0] + sum[3] + sum[6] + sum[9];
-            sumdst[1] += sum[1] + sum[4] + sum[7] + sum[10];
-            sumdst[2] += sum[2] + sum[5] + sum[8] + sum[11];
-        }
-        else
-        {
-            uint32x4_t vs = vdupq_n_u32(0);
-            for (; i <= width - 257*8; i += 257*8, src += 257 * 8)
-            {
-                uint16x8_t s1 = vmovl_u8(vld1_u8(src));
-
-                for (int j = 8; j < 257 * 8; j += 8)
-                {
-                    internal::prefetch(src + j);
-                    s1 = vaddw_u8(s1, vld1_u8(src + j));
-                }
-
-                vs = vqaddq_u32(vs, vaddl_u16(vget_low_u16(s1), vget_high_u16(s1)));
-            }
-            if (i < width - 7)
-            {
-                uint16x8_t s1 = vmovl_u8(vld1_u8(src));
-
-                for(i+=8,src+=8; i < width-7; i+=8,src+=8)
-                {
-                    internal::prefetch(src);
-                    s1 = vaddw_u8(s1, vld1_u8(src));
-                }
-                vs = vqaddq_u32(vs, vaddl_u16(vget_low_u16(s1), vget_high_u16(s1)));
-            }
-
-            if (channels == 1)
-            {
-                uint32x2_t vs2 = vqadd_u32(vget_low_u32(vs), vget_high_u32(vs));
-                uint32x2_t vs1 = vreinterpret_u32_u64(vpaddl_u32(vs2));
-
-                u32 s0 = vget_lane_u32(vs1, 0);
-                for(; i < width; ++i,++src)
-                    s0 += src[0];
-                sumdst[0] += s0;
-            }
-            else if (channels == 4)
-            {
-                vst1q_u32(sumdst, vqaddq_u32(vs, vld1q_u32(sumdst)));
-
-                for(; i < width; i+=4,src+=4)
-                {
-                    sumdst[0] += src[0];
-                    sumdst[1] += src[1];
-                    sumdst[2] += src[2];
-                    sumdst[3] += src[3];
-                }
-            }
-            else//if (channels == 2)
-            {
-                uint32x2_t vs2 = vqadd_u32(vget_low_u32(vs), vget_high_u32(vs));
-                vst1_u32(sumdst, vqadd_u32(vs2, vld1_u32(sumdst)));
-
-                for(; i < width; i+=2,src+=2)
-                {
-                    sumdst[0] += src[0];
-                    sumdst[1] += src[1];
-                }
-            }
-        }//channels != 3
-    }
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)sumdst;
-    (void)channels;
-#endif
-}
-
-void sum(const Size2D &_size,
-         const f32 * srcBase, ptrdiff_t srcStride,
-         f64 * sumdst, u32 channels)
-{
-    internal::assertSupportedConfiguration(isSumSupported(channels));
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    const ptrdiff_t width = size.width * channels;
-
-    for(size_t k = 0; k < size.height; ++k)
-    {
-        const f32* src = internal::getRowPtr( srcBase,  srcStride, k);
-        ptrdiff_t i = 0;
-
-        if (channels == 3)
-        {
-            float32x4_t vs1231 = vdupq_n_f32(0);
-            float32x4_t vs2312 = vdupq_n_f32(0);
-            float32x4_t vs3123 = vdupq_n_f32(0);
-            for(; i <= width-12; i += 12)
-            {
-                internal::prefetch(src + i + 12);
-                vs1231 = vaddq_f32(vs1231, vld1q_f32(src + i + 0));
-                vs2312 = vaddq_f32(vs2312, vld1q_f32(src + i + 4));
-                vs3123 = vaddq_f32(vs3123, vld1q_f32(src + i + 8));
-            }
-
-            f32 s[12];
-            vst1q_f32(s + 0, vs1231);
-            vst1q_f32(s + 4, vs2312);
-            vst1q_f32(s + 8, vs3123);
-
-            sumdst[0] += s[0] + s[3] + s[6] + s[9];
-            sumdst[1] += s[1] + s[4] + s[7] + s[10];
-            sumdst[2] += s[2] + s[5] + s[8] + s[11];
-            for( ; i < width; i+=3)
-            {
-                sumdst[0] += src[i];
-                sumdst[1] += src[i+1];
-                sumdst[2] += src[i+2];
-            }
-        }
-        else
-        {
-            float32x4_t vs = vdupq_n_f32(0);
-            for(; i <= width-4; i += 4)
-            {
-                internal::prefetch(src + i);
-                vs = vaddq_f32(vs, vld1q_f32(src+i));
-            }
-
-            if (channels == 1)
-            {
-                float32x2_t vs2 = vpadd_f32(vget_low_f32(vs), vget_high_f32(vs));
-                f32 s[2];
-                vst1_f32(s, vs2);
-
-                sumdst[0] += s[0] + s[1];
-                for( ; i < width; i++)
-                    sumdst[0] += src[i];
-            }
-            else if (channels == 4)
-            {
-                f32 s[4];
-                vst1q_f32(s, vs);
-
-                sumdst[0] += s[0];
-                sumdst[1] += s[1];
-                sumdst[2] += s[2];
-                sumdst[3] += s[3];
-            }
-            else//if (channels == 2)
-            {
-                float32x2_t vs2 = vadd_f32(vget_low_f32(vs), vget_high_f32(vs));
-                f32 s[2];
-                vst1_f32(s, vs2);
-
-                sumdst[0] += s[0];
-                sumdst[1] += s[1];
-
-                if(i < width)
-                {
-                    sumdst[0] += src[i];
-                    sumdst[1] += src[i+1];
-                }
-            }
-        }//channels != 3
-    }
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)sumdst;
-    (void)channels;
-#endif
-}
-
-bool isSqsumSupported(u32 channels)
-{
-    return (channels && ((4/channels)*channels == 4));
-}
-
-void sqsum(const Size2D &_size,
-           const u8 * srcBase, ptrdiff_t srcStride,
-           f64 * sumdst, f64 * sqsumdst, u32 channels)
-{
-    internal::assertSupportedConfiguration(isSqsumSupported(channels));
-#ifdef CAROTENE_NEON
-    Size2D size(_size);
-    if (srcStride == (ptrdiff_t)(size.width*channels))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-    const size_t width = size.width * channels;
-
-    size_t blockSize0 = 1 << 23;
-    size_t roiw8 = width & ~7;
-
-    uint32x4_t v_zero = vdupq_n_u32(0u);
-
-    for (size_t i = 0; i < size.height; ++i)
-    {
-        const u8 * src = internal::getRowPtr(srcBase, srcStride, i);
-        size_t j = 0u;
-
-        while (j < roiw8)
-        {
-            size_t blockSize = std::min(roiw8 - j, blockSize0) + j;
-            uint32x4_t v_sum = v_zero;
-            uint32x4_t v_sqsum = v_zero;
-
-            for ( ; j < blockSize ; j += 8, src += 8)
-            {
-                internal::prefetch(src);
-                uint8x8_t v_src0 = vld1_u8(src);
-
-                uint16x8_t v_src = vmovl_u8(v_src0);
-                uint16x4_t v_srclo = vget_low_u16(v_src), v_srchi = vget_high_u16(v_src);
-                v_sum = vaddq_u32(v_sum, vaddl_u16(v_srclo, v_srchi));
-                v_sqsum = vmlal_u16(v_sqsum, v_srclo, v_srclo);
-                v_sqsum = vmlal_u16(v_sqsum, v_srchi, v_srchi);
-            }
-
-            u32 arsum[8];
-            vst1q_u32(arsum, v_sum);
-            vst1q_u32(arsum + 4, v_sqsum);
-
-            sumdst[0] += (f64)arsum[0];
-            sumdst[1 % channels] += (f64)arsum[1];
-            sumdst[2 % channels] += (f64)arsum[2];
-            sumdst[3 % channels] += (f64)arsum[3];
-            sqsumdst[0] += (f64)arsum[4];
-            sqsumdst[1 % channels] += (f64)arsum[5];
-            sqsumdst[2 % channels] += (f64)arsum[6];
-            sqsumdst[3 % channels] += (f64)arsum[7];
-        }
-        // collect a few last elements in the current row
-        // it's ok to process channels elements per step
-        // since we could handle 1,2 or 4 channels
-        // we always have channels-fold amount of elements remaining
-        for ( ; j < width; j+=channels, src+=channels)
-        {
-            for (u32 kk = 0; kk < channels; kk++)
-            {
-                u32 srcval = src[kk];
-                sumdst[kk] += srcval;
-                sqsumdst[kk] += srcval * srcval;
-            }
-        }
-    }
-#else
-    (void)_size;
-    (void)srcBase;
-    (void)srcStride;
-    (void)sumdst;
-    (void)sqsumdst;
-    (void)channels;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/template_matching.cpp
+++ b/3rdparty/carotene/src/template_matching.cpp
@ -1,241 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2013-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "common.hpp"
-
-#include <vector>
-#include <cstring>
-
-namespace CAROTENE_NS {
-
-#define ENABLE4LINESMATCHING false  //Disabled since overall time for simultaneous 4 lines matching is greater than
-                                    //time for simultaneous 2 lines matching for the same amount of data
-
-bool isMatchTemplateSupported(const Size2D &tmplSize)
-{
-    return isSupportedConfiguration() &&
-           tmplSize.width >= 8 && // Actually the function could process even shorter templates
-                                  // but there will be no NEON optimization in this case
-           (tmplSize.width * tmplSize.height) <= 256;
-}
-
-void matchTemplate(const Size2D &srcSize,
-                   const u8 * srcBase, ptrdiff_t srcStride,
-                   const Size2D &tmplSize,
-                   const u8 * tmplBase, ptrdiff_t tmplStride,
-                   f32 * dstBase, ptrdiff_t dstStride,
-                   bool normalize)
-{
-    internal::assertSupportedConfiguration(isMatchTemplateSupported(tmplSize));
-#ifdef CAROTENE_NEON
-    const size_t tmplW = tmplSize.width;
-    const size_t tmplH = tmplSize.height;
-    const size_t dstW  = srcSize.width  - tmplSize.width  + 1;
-    const size_t dstH  = srcSize.height - tmplSize.height + 1;
-
-    //template correlation part
-    {
-#if ENABLE4LINESMATCHING
-        const size_t dstroiw4 = dstW & ~3u;
-#endif
-        const size_t dstroiw2 = dstW & ~1u;
-        const size_t tmplroiw = tmplW & ~7u;
-        const size_t dstride = dstStride >> 2;
-
-        f32 *corr = dstBase;
-        const u8  *imgrrow = srcBase;
-        for(size_t r = 0; r < dstH; ++r, corr+=dstride, imgrrow+=srcStride)
-        {
-            size_t c = 0;
-#if ENABLE4LINESMATCHING
-            for(; c < dstroiw4; c+=4)
-            {
-                u32 dot[4] = {0, 0, 0, 0};
-                uint32x4_t vdot0 = vmovq_n_u32(0);
-                uint32x4_t vdot1 = vmovq_n_u32(0);
-                uint32x4_t vdot2 = vmovq_n_u32(0);
-                uint32x4_t vdot3 = vmovq_n_u32(0);
-
-                const u8  *img = imgrrow;
-                const u8 *tmpl = tmplBase;
-                for(size_t i = 0; i < tmplH; ++i, tmpl+=tmplStride, img+=srcStride)
-                {
-                    size_t j = 0;
-                    for(; j < tmplroiw; j+=8)
-                    {
-                        uint8x8_t vtmpl = vld1_u8(tmpl + j);
-
-                        uint8x8_t vimg0 = vld1_u8(img + j + c + 0);
-                        uint8x8_t vimg1 = vld1_u8(img + j + c + 1);
-                        uint8x8_t vimg2 = vld1_u8(img + j + c + 2);
-                        uint8x8_t vimg3 = vld1_u8(img + j + c + 3);
-
-                        uint16x8_t vd0 = vmull_u8(vtmpl, vimg0);
-                        uint16x8_t vd1 = vmull_u8(vtmpl, vimg1);
-                        uint16x8_t vd2 = vmull_u8(vtmpl, vimg2);
-                        uint16x8_t vd3 = vmull_u8(vtmpl, vimg3);
-
-                        vdot0 = vpadalq_u16(vdot0, vd0);
-                        vdot1 = vpadalq_u16(vdot1, vd1);
-                        vdot2 = vpadalq_u16(vdot2, vd2);
-                        vdot3 = vpadalq_u16(vdot3, vd3);
-                    }
-                    for(; j < tmplW; ++j)
-                    {
-                        dot[0] += tmpl[j] * img[j + c + 0];
-                        dot[1] += tmpl[j] * img[j + c + 1];
-                        dot[2] += tmpl[j] * img[j + c + 2];
-                        dot[3] += tmpl[j] * img[j + c + 3];
-                    }
-                }
-                uint32x4_t vdotx   = vld1q_u32(dot);
-                uint32x2_t vdot_0  = vpadd_u32(vget_low_u32(vdot0), vget_high_u32(vdot0));
-                uint32x2_t vdot_1  = vpadd_u32(vget_low_u32(vdot1), vget_high_u32(vdot1));
-                uint32x2_t vdot_2  = vpadd_u32(vget_low_u32(vdot2), vget_high_u32(vdot2));
-                uint32x2_t vdot_3  = vpadd_u32(vget_low_u32(vdot3), vget_high_u32(vdot3));
-                uint32x2_t vdot_01 = vpadd_u32(vdot_0, vdot_1);
-                uint32x2_t vdot_23 = vpadd_u32(vdot_2, vdot_3);
-
-                vst1q_f32(corr + c, vcvtq_f32_u32(vaddq_u32(vdotx, vcombine_u32(vdot_01, vdot_23))));
-            }
-#endif
-
-            for(; c < dstroiw2; c+=2)
-            {
-                u32 dot[2] = {0, 0};
-                uint32x4_t vdot0 = vmovq_n_u32(0);
-                uint32x4_t vdot1 = vmovq_n_u32(0);
-                const u8  *img = imgrrow;
-                const u8 *tmpl = tmplBase;
-                for(size_t i = 0; i < tmplH; ++i, tmpl+=tmplStride, img+=srcStride)
-                {
-                    size_t j = 0;
-                    for(; j < tmplroiw; j+=8)
-                    {
-                        uint8x8_t vtmpl = vld1_u8(tmpl + j);
-
-                        uint8x8_t vimg0 = vld1_u8(img + j + c + 0);
-                        uint8x8_t vimg1 = vld1_u8(img + j + c + 1);
-
-                        uint16x8_t vd0 = vmull_u8(vtmpl, vimg0);
-                        uint16x8_t vd1 = vmull_u8(vtmpl, vimg1);
-
-                        vdot0 = vpadalq_u16(vdot0, vd0);
-                        vdot1 = vpadalq_u16(vdot1, vd1);
-                    }
-                    for(; j < tmplW; ++j)
-                    {
-                        dot[0] += tmpl[j] * img[j + c + 0];
-                        dot[1] += tmpl[j] * img[j + c + 1];
-                    }
-                }
-                uint32x2_t vdotx  = vld1_u32(dot);
-                uint32x2_t vdot_0 = vpadd_u32(vget_low_u32(vdot0), vget_high_u32(vdot0));
-                uint32x2_t vdot_1 = vpadd_u32(vget_low_u32(vdot1), vget_high_u32(vdot1));
-                uint32x2_t vdot_  = vpadd_u32(vdot_0, vdot_1);
-                vst1_f32(corr + c, vcvt_f32_u32(vadd_u32(vdotx, vdot_)));
-            }
-
-            for(; c < dstW; ++c)
-            {
-                u32 dot = 0;
-                uint32x4_t vdot = vmovq_n_u32(0);
-                const u8  *img = imgrrow;
-                const u8 *tmpl = tmplBase;
-                for(size_t i = 0; i < tmplH; ++i, tmpl+=tmplStride, img+=srcStride)
-                {
-                    size_t j = 0;
-                    for(; j < tmplroiw; j+=8)
-                    {
-                        uint8x8_t vtmpl = vld1_u8(tmpl + j);
-                        uint8x8_t vimg  = vld1_u8(img + j + c);
-                        uint16x8_t vd   = vmull_u8(vtmpl, vimg);
-                        vdot = vpadalq_u16(vdot, vd);
-                    }
-                    for(; j < tmplW; ++j)
-                        dot += tmpl[j] * img[j + c];
-                }
-                u32 wdot[2];
-                vst1_u32(wdot, vpadd_u32(vget_low_u32(vdot), vget_high_u32(vdot)));
-                dot += wdot[0] + wdot[1];
-                corr[c] = (f32)dot;
-            }
-        }
-    }
-
-    if(normalize)
-    {
-        f32 tn = std::sqrt((f32)normL2(tmplSize, tmplBase, tmplStride));
-
-        size_t iw = srcSize.width+1;
-        size_t ih = srcSize.height+1;
-        std::vector<f64> _sqsum(iw*ih);
-        f64 *sqsum = &_sqsum[0];
-        memset(sqsum, 0, iw*sizeof(f64));
-        for(size_t i = 1; i < ih; ++i)
-            sqsum[iw*i] = 0.;
-        sqrIntegral(srcSize, srcBase, srcStride, sqsum + iw + 1, iw*sizeof(f64));
-
-        for(size_t i = 0; i < dstH; ++i)
-        {
-            f32 *result = internal::getRowPtr(dstBase, dstStride, i);
-            for(size_t j = 0; j < dstW; ++j)
-            {
-                double s2 = sqsum[iw*i + j] +
-                            sqsum[iw*(i + tmplSize.height) + j + tmplSize.width] -
-                            sqsum[iw*(i + tmplSize.height) + j] -
-                            sqsum[iw*i + j + tmplSize.width];
-
-                result[j] /= tn * std::sqrt(s2);
-            }
-        }
-    }
-#else
-    (void)srcSize;
-    (void)srcBase;
-    (void)srcStride;
-    (void)tmplBase;
-    (void)tmplStride;
-    (void)dstBase;
-    (void)dstStride;
-    (void)normalize;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/threshold.cpp
+++ b/3rdparty/carotene/src/threshold.cpp
--- a/3rdparty/carotene/src/vtransform.hpp
+++ b/3rdparty/carotene/src/vtransform.hpp
@ -1,689 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2014-2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#ifndef CAROTENE_SRC_VTRANSFORM_HPP
-#define CAROTENE_SRC_VTRANSFORM_HPP
-
-#include "common.hpp"
-
-#include <carotene/types.hpp>
-
-#ifdef CAROTENE_NEON
-
-namespace CAROTENE_NS { namespace internal {
-
-////////////////////////////// Type Traits ///////////////////////
-
-template <typename T, int cn = 1>
-struct VecTraits;
-
-template <> struct VecTraits< u8, 1> { typedef  uint8x16_t vec128; typedef   uint8x8_t vec64; typedef VecTraits<  u8, 1> unsign; };
-template <> struct VecTraits< s8, 1> { typedef   int8x16_t vec128; typedef    int8x8_t vec64; typedef VecTraits<  u8, 1> unsign; };
-template <> struct VecTraits<u16, 1> { typedef  uint16x8_t vec128; typedef  uint16x4_t vec64; typedef VecTraits< u16, 1> unsign; };
-template <> struct VecTraits<s16, 1> { typedef   int16x8_t vec128; typedef   int16x4_t vec64; typedef VecTraits< u16, 1> unsign; };
-template <> struct VecTraits<s32, 1> { typedef   int32x4_t vec128; typedef   int32x2_t vec64; typedef VecTraits< u32, 1> unsign; };
-template <> struct VecTraits<u32, 1> { typedef  uint32x4_t vec128; typedef  uint32x2_t vec64; typedef VecTraits< u32, 1> unsign; };
-template <> struct VecTraits<s64, 1> { typedef   int64x2_t vec128; typedef   int64x1_t vec64; typedef VecTraits< u64, 1> unsign; };
-template <> struct VecTraits<u64, 1> { typedef  uint64x2_t vec128; typedef  uint64x1_t vec64; typedef VecTraits< u64, 1> unsign; };
-template <> struct VecTraits<f32, 1> { typedef float32x4_t vec128; typedef float32x2_t vec64; typedef VecTraits< u32, 1> unsign; };
-
-template <> struct VecTraits< u8, 2> { typedef  uint8x16x2_t vec128; typedef   uint8x8x2_t vec64; typedef VecTraits<  u8, 2> unsign; };
-template <> struct VecTraits< s8, 2> { typedef   int8x16x2_t vec128; typedef    int8x8x2_t vec64; typedef VecTraits<  u8, 2> unsign; };
-template <> struct VecTraits<u16, 2> { typedef  uint16x8x2_t vec128; typedef  uint16x4x2_t vec64; typedef VecTraits< u16, 2> unsign; };
-template <> struct VecTraits<s16, 2> { typedef   int16x8x2_t vec128; typedef   int16x4x2_t vec64; typedef VecTraits< u16, 2> unsign; };
-template <> struct VecTraits<s32, 2> { typedef   int32x4x2_t vec128; typedef   int32x2x2_t vec64; typedef VecTraits< u32, 2> unsign; };
-template <> struct VecTraits<u32, 2> { typedef  uint32x4x2_t vec128; typedef  uint32x2x2_t vec64; typedef VecTraits< u32, 2> unsign; };
-template <> struct VecTraits<s64, 2> { typedef   int64x2x2_t vec128; typedef   int64x1x2_t vec64; typedef VecTraits< u64, 2> unsign; };
-template <> struct VecTraits<u64, 2> { typedef  uint64x2x2_t vec128; typedef  uint64x1x2_t vec64; typedef VecTraits< u64, 2> unsign; };
-template <> struct VecTraits<f32, 2> { typedef float32x4x2_t vec128; typedef float32x2x2_t vec64; typedef VecTraits< u32, 2> unsign; };
-
-template <> struct VecTraits< u8, 3> { typedef  uint8x16x3_t vec128; typedef   uint8x8x3_t vec64; typedef VecTraits<  u8, 3> unsign; };
-template <> struct VecTraits< s8, 3> { typedef   int8x16x3_t vec128; typedef    int8x8x3_t vec64; typedef VecTraits<  u8, 3> unsign; };
-template <> struct VecTraits<u16, 3> { typedef  uint16x8x3_t vec128; typedef  uint16x4x3_t vec64; typedef VecTraits< u16, 3> unsign; };
-template <> struct VecTraits<s16, 3> { typedef   int16x8x3_t vec128; typedef   int16x4x3_t vec64; typedef VecTraits< u16, 3> unsign; };
-template <> struct VecTraits<s32, 3> { typedef   int32x4x3_t vec128; typedef   int32x2x3_t vec64; typedef VecTraits< u32, 3> unsign; };
-template <> struct VecTraits<u32, 3> { typedef  uint32x4x3_t vec128; typedef  uint32x2x3_t vec64; typedef VecTraits< u32, 3> unsign; };
-template <> struct VecTraits<s64, 3> { typedef   int64x2x3_t vec128; typedef   int64x1x3_t vec64; typedef VecTraits< u64, 2> unsign; };
-template <> struct VecTraits<u64, 3> { typedef  uint64x2x3_t vec128; typedef  uint64x1x3_t vec64; typedef VecTraits< u64, 2> unsign; };
-template <> struct VecTraits<f32, 3> { typedef float32x4x3_t vec128; typedef float32x2x3_t vec64; typedef VecTraits< u32, 3> unsign; };
-
-template <> struct VecTraits< u8, 4> { typedef  uint8x16x4_t vec128; typedef   uint8x8x4_t vec64; typedef VecTraits<  u8, 3> unsign; };
-template <> struct VecTraits< s8, 4> { typedef   int8x16x4_t vec128; typedef    int8x8x4_t vec64; typedef VecTraits<  u8, 3> unsign; };
-template <> struct VecTraits<u16, 4> { typedef  uint16x8x4_t vec128; typedef  uint16x4x4_t vec64; typedef VecTraits< u16, 3> unsign; };
-template <> struct VecTraits<s16, 4> { typedef   int16x8x4_t vec128; typedef   int16x4x4_t vec64; typedef VecTraits< u16, 3> unsign; };
-template <> struct VecTraits<s32, 4> { typedef   int32x4x4_t vec128; typedef   int32x2x4_t vec64; typedef VecTraits< u32, 3> unsign; };
-template <> struct VecTraits<u32, 4> { typedef  uint32x4x4_t vec128; typedef  uint32x2x4_t vec64; typedef VecTraits< u32, 3> unsign; };
-template <> struct VecTraits<s64, 4> { typedef   int64x2x4_t vec128; typedef   int64x1x4_t vec64; typedef VecTraits< u64, 2> unsign; };
-template <> struct VecTraits<u64, 4> { typedef  uint64x2x4_t vec128; typedef  uint64x1x4_t vec64; typedef VecTraits< u64, 2> unsign; };
-template <> struct VecTraits<f32, 4> { typedef float32x4x4_t vec128; typedef float32x2x4_t vec64; typedef VecTraits< u32, 3> unsign; };
-
-////////////////////////////// vld1q ///////////////////////
-
-inline  uint8x16_t vld1q(const u8  * ptr) { return  vld1q_u8(ptr); }
-inline   int8x16_t vld1q(const s8  * ptr) { return  vld1q_s8(ptr); }
-inline  uint16x8_t vld1q(const u16 * ptr) { return vld1q_u16(ptr); }
-inline   int16x8_t vld1q(const s16 * ptr) { return vld1q_s16(ptr); }
-inline  uint32x4_t vld1q(const u32 * ptr) { return vld1q_u32(ptr); }
-inline   int32x4_t vld1q(const s32 * ptr) { return vld1q_s32(ptr); }
-inline float32x4_t vld1q(const f32 * ptr) { return vld1q_f32(ptr); }
-
-////////////////////////////// vld1 ///////////////////////
-
-inline   uint8x8_t vld1(const u8  * ptr) { return  vld1_u8(ptr); }
-inline    int8x8_t vld1(const s8  * ptr) { return  vld1_s8(ptr); }
-inline  uint16x4_t vld1(const u16 * ptr) { return vld1_u16(ptr); }
-inline   int16x4_t vld1(const s16 * ptr) { return vld1_s16(ptr); }
-inline  uint32x2_t vld1(const u32 * ptr) { return vld1_u32(ptr); }
-inline   int32x2_t vld1(const s32 * ptr) { return vld1_s32(ptr); }
-inline float32x2_t vld1(const f32 * ptr) { return vld1_f32(ptr); }
-
-////////////////////////////// vld2q ///////////////////////
-
-inline  uint8x16x2_t vld2q(const u8  * ptr) { return  vld2q_u8(ptr); }
-inline   int8x16x2_t vld2q(const s8  * ptr) { return  vld2q_s8(ptr); }
-inline  uint16x8x2_t vld2q(const u16 * ptr) { return vld2q_u16(ptr); }
-inline   int16x8x2_t vld2q(const s16 * ptr) { return vld2q_s16(ptr); }
-inline  uint32x4x2_t vld2q(const u32 * ptr) { return vld2q_u32(ptr); }
-inline   int32x4x2_t vld2q(const s32 * ptr) { return vld2q_s32(ptr); }
-inline float32x4x2_t vld2q(const f32 * ptr) { return vld2q_f32(ptr); }
-
-////////////////////////////// vld2 ///////////////////////
-
-inline   uint8x8x2_t vld2(const u8  * ptr) { return  vld2_u8(ptr); }
-inline    int8x8x2_t vld2(const s8  * ptr) { return  vld2_s8(ptr); }
-inline  uint16x4x2_t vld2(const u16 * ptr) { return vld2_u16(ptr); }
-inline   int16x4x2_t vld2(const s16 * ptr) { return vld2_s16(ptr); }
-inline  uint32x2x2_t vld2(const u32 * ptr) { return vld2_u32(ptr); }
-inline   int32x2x2_t vld2(const s32 * ptr) { return vld2_s32(ptr); }
-inline float32x2x2_t vld2(const f32 * ptr) { return vld2_f32(ptr); }
-
-////////////////////////////// vld3q ///////////////////////
-
-inline  uint8x16x3_t vld3q(const u8  * ptr) { return  vld3q_u8(ptr); }
-inline   int8x16x3_t vld3q(const s8  * ptr) { return  vld3q_s8(ptr); }
-inline  uint16x8x3_t vld3q(const u16 * ptr) { return vld3q_u16(ptr); }
-inline   int16x8x3_t vld3q(const s16 * ptr) { return vld3q_s16(ptr); }
-inline  uint32x4x3_t vld3q(const u32 * ptr) { return vld3q_u32(ptr); }
-inline   int32x4x3_t vld3q(const s32 * ptr) { return vld3q_s32(ptr); }
-inline float32x4x3_t vld3q(const f32 * ptr) { return vld3q_f32(ptr); }
-
-////////////////////////////// vld3 ///////////////////////
-
-inline   uint8x8x3_t vld3(const u8  * ptr) { return  vld3_u8(ptr); }
-inline    int8x8x3_t vld3(const s8  * ptr) { return  vld3_s8(ptr); }
-inline  uint16x4x3_t vld3(const u16 * ptr) { return vld3_u16(ptr); }
-inline   int16x4x3_t vld3(const s16 * ptr) { return vld3_s16(ptr); }
-inline  uint32x2x3_t vld3(const u32 * ptr) { return vld3_u32(ptr); }
-inline   int32x2x3_t vld3(const s32 * ptr) { return vld3_s32(ptr); }
-inline float32x2x3_t vld3(const f32 * ptr) { return vld3_f32(ptr); }
-
-////////////////////////////// vld4q ///////////////////////
-
-inline  uint8x16x4_t vld4q(const u8  * ptr) { return  vld4q_u8(ptr); }
-inline   int8x16x4_t vld4q(const s8  * ptr) { return  vld4q_s8(ptr); }
-inline  uint16x8x4_t vld4q(const u16 * ptr) { return vld4q_u16(ptr); }
-inline   int16x8x4_t vld4q(const s16 * ptr) { return vld4q_s16(ptr); }
-inline  uint32x4x4_t vld4q(const u32 * ptr) { return vld4q_u32(ptr); }
-inline   int32x4x4_t vld4q(const s32 * ptr) { return vld4q_s32(ptr); }
-inline float32x4x4_t vld4q(const f32 * ptr) { return vld4q_f32(ptr); }
-
-////////////////////////////// vld4 ///////////////////////
-
-inline   uint8x8x4_t vld4(const u8  * ptr) { return  vld4_u8(ptr); }
-inline    int8x8x4_t vld4(const s8  * ptr) { return  vld4_s8(ptr); }
-inline  uint16x4x4_t vld4(const u16 * ptr) { return vld4_u16(ptr); }
-inline   int16x4x4_t vld4(const s16 * ptr) { return vld4_s16(ptr); }
-inline  uint32x2x4_t vld4(const u32 * ptr) { return vld4_u32(ptr); }
-inline   int32x2x4_t vld4(const s32 * ptr) { return vld4_s32(ptr); }
-inline float32x2x4_t vld4(const f32 * ptr) { return vld4_f32(ptr); }
-
-////////////////////////////// vst1q ///////////////////////
-
-inline void vst1q(u8  * ptr, const uint8x16_t  & v) { return vst1q_u8(ptr,  v); }
-inline void vst1q(s8  * ptr, const int8x16_t   & v) { return vst1q_s8(ptr,  v); }
-inline void vst1q(u16 * ptr, const uint16x8_t  & v) { return vst1q_u16(ptr, v); }
-inline void vst1q(s16 * ptr, const int16x8_t   & v) { return vst1q_s16(ptr, v); }
-inline void vst1q(u32 * ptr, const uint32x4_t  & v) { return vst1q_u32(ptr, v); }
-inline void vst1q(s32 * ptr, const int32x4_t   & v) { return vst1q_s32(ptr, v); }
-inline void vst1q(f32 * ptr, const float32x4_t & v) { return vst1q_f32(ptr, v); }
-
-////////////////////////////// vst1 ///////////////////////
-
-inline void vst1(u8  * ptr, const uint8x8_t   & v) { return vst1_u8(ptr,  v); }
-inline void vst1(s8  * ptr, const int8x8_t    & v) { return vst1_s8(ptr,  v); }
-inline void vst1(u16 * ptr, const uint16x4_t  & v) { return vst1_u16(ptr, v); }
-inline void vst1(s16 * ptr, const int16x4_t   & v) { return vst1_s16(ptr, v); }
-inline void vst1(u32 * ptr, const uint32x2_t  & v) { return vst1_u32(ptr, v); }
-inline void vst1(s32 * ptr, const int32x2_t   & v) { return vst1_s32(ptr, v); }
-inline void vst1(f32 * ptr, const float32x2_t & v) { return vst1_f32(ptr, v); }
-
-////////////////////////////// vst2q ///////////////////////
-
-inline void vst2q(u8  * ptr, const uint8x16x2_t  & v) { return vst2q_u8(ptr,  v); }
-inline void vst2q(s8  * ptr, const int8x16x2_t   & v) { return vst2q_s8(ptr,  v); }
-inline void vst2q(u16 * ptr, const uint16x8x2_t  & v) { return vst2q_u16(ptr, v); }
-inline void vst2q(s16 * ptr, const int16x8x2_t   & v) { return vst2q_s16(ptr, v); }
-inline void vst2q(u32 * ptr, const uint32x4x2_t  & v) { return vst2q_u32(ptr, v); }
-inline void vst2q(s32 * ptr, const int32x4x2_t   & v) { return vst2q_s32(ptr, v); }
-inline void vst2q(f32 * ptr, const float32x4x2_t & v) { return vst2q_f32(ptr, v); }
-
-////////////////////////////// vst2 ///////////////////////
-
-inline void vst2(u8  * ptr, const uint8x8x2_t   & v) { return vst2_u8(ptr,  v); }
-inline void vst2(s8  * ptr, const int8x8x2_t    & v) { return vst2_s8(ptr,  v); }
-inline void vst2(u16 * ptr, const uint16x4x2_t  & v) { return vst2_u16(ptr, v); }
-inline void vst2(s16 * ptr, const int16x4x2_t   & v) { return vst2_s16(ptr, v); }
-inline void vst2(u32 * ptr, const uint32x2x2_t  & v) { return vst2_u32(ptr, v); }
-inline void vst2(s32 * ptr, const int32x2x2_t   & v) { return vst2_s32(ptr, v); }
-inline void vst2(f32 * ptr, const float32x2x2_t & v) { return vst2_f32(ptr, v); }
-
-////////////////////////////// vst3q ///////////////////////
-
-inline void vst3q(u8  * ptr, const uint8x16x3_t  & v) { return vst3q_u8(ptr,  v); }
-inline void vst3q(s8  * ptr, const int8x16x3_t   & v) { return vst3q_s8(ptr,  v); }
-inline void vst3q(u16 * ptr, const uint16x8x3_t  & v) { return vst3q_u16(ptr, v); }
-inline void vst3q(s16 * ptr, const int16x8x3_t   & v) { return vst3q_s16(ptr, v); }
-inline void vst3q(u32 * ptr, const uint32x4x3_t  & v) { return vst3q_u32(ptr, v); }
-inline void vst3q(s32 * ptr, const int32x4x3_t   & v) { return vst3q_s32(ptr, v); }
-inline void vst3q(f32 * ptr, const float32x4x3_t & v) { return vst3q_f32(ptr, v); }
-
-////////////////////////////// vst3 ///////////////////////
-
-inline void vst3(u8  * ptr, const uint8x8x3_t   & v) { return vst3_u8(ptr,  v); }
-inline void vst3(s8  * ptr, const int8x8x3_t    & v) { return vst3_s8(ptr,  v); }
-inline void vst3(u16 * ptr, const uint16x4x3_t  & v) { return vst3_u16(ptr, v); }
-inline void vst3(s16 * ptr, const int16x4x3_t   & v) { return vst3_s16(ptr, v); }
-inline void vst3(u32 * ptr, const uint32x2x3_t  & v) { return vst3_u32(ptr, v); }
-inline void vst3(s32 * ptr, const int32x2x3_t   & v) { return vst3_s32(ptr, v); }
-inline void vst3(f32 * ptr, const float32x2x3_t & v) { return vst3_f32(ptr, v); }
-
-////////////////////////////// vst4q ///////////////////////
-
-inline void vst4q(u8  * ptr, const uint8x16x4_t  & v) { return vst4q_u8(ptr,  v); }
-inline void vst4q(s8  * ptr, const int8x16x4_t   & v) { return vst4q_s8(ptr,  v); }
-inline void vst4q(u16 * ptr, const uint16x8x4_t  & v) { return vst4q_u16(ptr, v); }
-inline void vst4q(s16 * ptr, const int16x8x4_t   & v) { return vst4q_s16(ptr, v); }
-inline void vst4q(u32 * ptr, const uint32x4x4_t  & v) { return vst4q_u32(ptr, v); }
-inline void vst4q(s32 * ptr, const int32x4x4_t   & v) { return vst4q_s32(ptr, v); }
-inline void vst4q(f32 * ptr, const float32x4x4_t & v) { return vst4q_f32(ptr, v); }
-
-////////////////////////////// vst4 ///////////////////////
-
-inline void vst4(u8  * ptr, const uint8x8x4_t   & v) { return vst4_u8(ptr,  v); }
-inline void vst4(s8  * ptr, const int8x8x4_t    & v) { return vst4_s8(ptr,  v); }
-inline void vst4(u16 * ptr, const uint16x4x4_t  & v) { return vst4_u16(ptr, v); }
-inline void vst4(s16 * ptr, const int16x4x4_t   & v) { return vst4_s16(ptr, v); }
-inline void vst4(u32 * ptr, const uint32x2x4_t  & v) { return vst4_u32(ptr, v); }
-inline void vst4(s32 * ptr, const int32x2x4_t   & v) { return vst4_s32(ptr, v); }
-inline void vst4(f32 * ptr, const float32x2x4_t & v) { return vst4_f32(ptr, v); }
-
-////////////////////////////// vabdq ///////////////////////
-
-inline  uint8x16_t vabdq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vabdq_u8 (v0, v1); }
-inline   int8x16_t vabdq(const int8x16_t   & v0, const int8x16_t   & v1) { return vabdq_s8 (v0, v1); }
-inline  uint16x8_t vabdq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vabdq_u16(v0, v1); }
-inline   int16x8_t vabdq(const int16x8_t   & v0, const int16x8_t   & v1) { return vabdq_s16(v0, v1); }
-inline  uint32x4_t vabdq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vabdq_u32(v0, v1); }
-inline   int32x4_t vabdq(const int32x4_t   & v0, const int32x4_t   & v1) { return vabdq_s32(v0, v1); }
-inline float32x4_t vabdq(const float32x4_t & v0, const float32x4_t & v1) { return vabdq_f32(v0, v1); }
-
-////////////////////////////// vabd ///////////////////////
-
-inline   uint8x8_t vabd(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vabd_u8 (v0, v1); }
-inline    int8x8_t vabd(const int8x8_t    & v0, const int8x8_t    & v1) { return vabd_s8 (v0, v1); }
-inline  uint16x4_t vabd(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vabd_u16(v0, v1); }
-inline   int16x4_t vabd(const int16x4_t   & v0, const int16x4_t   & v1) { return vabd_s16(v0, v1); }
-inline  uint32x2_t vabd(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vabd_u32(v0, v1); }
-inline   int32x2_t vabd(const int32x2_t   & v0, const int32x2_t   & v1) { return vabd_s32(v0, v1); }
-inline float32x2_t vabd(const float32x2_t & v0, const float32x2_t & v1) { return vabd_f32(v0, v1); }
-
-////////////////////////////// vminq ///////////////////////
-
-inline  uint8x16_t vminq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vminq_u8 (v0, v1); }
-inline   int8x16_t vminq(const int8x16_t   & v0, const int8x16_t   & v1) { return vminq_s8 (v0, v1); }
-inline  uint16x8_t vminq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vminq_u16(v0, v1); }
-inline   int16x8_t vminq(const int16x8_t   & v0, const int16x8_t   & v1) { return vminq_s16(v0, v1); }
-inline  uint32x4_t vminq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vminq_u32(v0, v1); }
-inline   int32x4_t vminq(const int32x4_t   & v0, const int32x4_t   & v1) { return vminq_s32(v0, v1); }
-inline float32x4_t vminq(const float32x4_t & v0, const float32x4_t & v1) { return vminq_f32(v0, v1); }
-
-////////////////////////////// vmin ///////////////////////
-
-inline   uint8x8_t vmin(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vmin_u8 (v0, v1); }
-inline    int8x8_t vmin(const int8x8_t    & v0, const int8x8_t    & v1) { return vmin_s8 (v0, v1); }
-inline  uint16x4_t vmin(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vmin_u16(v0, v1); }
-inline   int16x4_t vmin(const int16x4_t   & v0, const int16x4_t   & v1) { return vmin_s16(v0, v1); }
-inline  uint32x2_t vmin(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vmin_u32(v0, v1); }
-inline   int32x2_t vmin(const int32x2_t   & v0, const int32x2_t   & v1) { return vmin_s32(v0, v1); }
-inline float32x2_t vmin(const float32x2_t & v0, const float32x2_t & v1) { return vmin_f32(v0, v1); }
-
-////////////////////////////// vmaxq ///////////////////////
-
-inline  uint8x16_t vmaxq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vmaxq_u8 (v0, v1); }
-inline   int8x16_t vmaxq(const int8x16_t   & v0, const int8x16_t   & v1) { return vmaxq_s8 (v0, v1); }
-inline  uint16x8_t vmaxq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vmaxq_u16(v0, v1); }
-inline   int16x8_t vmaxq(const int16x8_t   & v0, const int16x8_t   & v1) { return vmaxq_s16(v0, v1); }
-inline  uint32x4_t vmaxq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vmaxq_u32(v0, v1); }
-inline   int32x4_t vmaxq(const int32x4_t   & v0, const int32x4_t   & v1) { return vmaxq_s32(v0, v1); }
-inline float32x4_t vmaxq(const float32x4_t & v0, const float32x4_t & v1) { return vmaxq_f32(v0, v1); }
-
-////////////////////////////// vmax ///////////////////////
-
-inline   uint8x8_t vmax(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vmax_u8 (v0, v1); }
-inline    int8x8_t vmax(const int8x8_t    & v0, const int8x8_t    & v1) { return vmax_s8 (v0, v1); }
-inline  uint16x4_t vmax(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vmax_u16(v0, v1); }
-inline   int16x4_t vmax(const int16x4_t   & v0, const int16x4_t   & v1) { return vmax_s16(v0, v1); }
-inline  uint32x2_t vmax(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vmax_u32(v0, v1); }
-inline   int32x2_t vmax(const int32x2_t   & v0, const int32x2_t   & v1) { return vmax_s32(v0, v1); }
-inline float32x2_t vmax(const float32x2_t & v0, const float32x2_t & v1) { return vmax_f32(v0, v1); }
-
-////////////////////////////// vdupq_n ///////////////////////
-
-inline  uint8x16_t vdupq_n(const u8  & val) { return  vdupq_n_u8(val); }
-inline   int8x16_t vdupq_n(const s8  & val) { return  vdupq_n_s8(val); }
-inline  uint16x8_t vdupq_n(const u16 & val) { return vdupq_n_u16(val); }
-inline   int16x8_t vdupq_n(const s16 & val) { return vdupq_n_s16(val); }
-inline  uint32x4_t vdupq_n(const u32 & val) { return vdupq_n_u32(val); }
-inline   int32x4_t vdupq_n(const s32 & val) { return vdupq_n_s32(val); }
-inline  uint64x2_t vdupq_n(const u64 & val) { return vdupq_n_u64(val); }
-inline   int64x2_t vdupq_n(const s64 & val) { return vdupq_n_s64(val); }
-inline float32x4_t vdupq_n(const f32 & val) { return vdupq_n_f32(val); }
-
-////////////////////////////// vdup_n ///////////////////////
-
-inline   uint8x8_t vdup_n(const u8  & val) { return  vdup_n_u8(val); }
-inline    int8x8_t vdup_n(const s8  & val) { return  vdup_n_s8(val); }
-inline  uint16x4_t vdup_n(const u16 & val) { return vdup_n_u16(val); }
-inline   int16x4_t vdup_n(const s16 & val) { return vdup_n_s16(val); }
-inline  uint32x2_t vdup_n(const u32 & val) { return vdup_n_u32(val); }
-inline   int32x2_t vdup_n(const s32 & val) { return vdup_n_s32(val); }
-inline  uint64x1_t vdup_n(const u64 & val) { return vdup_n_u64(val); }
-inline   int64x1_t vdup_n(const s64 & val) { return vdup_n_s64(val); }
-inline float32x2_t vdup_n(const f32 & val) { return vdup_n_f32(val); }
-
-////////////////////////////// vget_low ///////////////////////
-
-inline uint8x8_t   vget_low(const uint8x16_t  & v) { return vget_low_u8 (v); }
-inline int8x8_t    vget_low(const int8x16_t   & v) { return vget_low_s8 (v); }
-inline uint16x4_t  vget_low(const uint16x8_t  & v) { return vget_low_u16(v); }
-inline int16x4_t   vget_low(const int16x8_t   & v) { return vget_low_s16(v); }
-inline uint32x2_t  vget_low(const uint32x4_t  & v) { return vget_low_u32(v); }
-inline int32x2_t   vget_low(const int32x4_t   & v) { return vget_low_s32(v); }
-inline float32x2_t vget_low(const float32x4_t & v) { return vget_low_f32(v); }
-
-////////////////////////////// vget_high ///////////////////////
-
-inline uint8x8_t   vget_high(const uint8x16_t  & v) { return vget_high_u8 (v); }
-inline int8x8_t    vget_high(const int8x16_t   & v) { return vget_high_s8 (v); }
-inline uint16x4_t  vget_high(const uint16x8_t  & v) { return vget_high_u16(v); }
-inline int16x4_t   vget_high(const int16x8_t   & v) { return vget_high_s16(v); }
-inline uint32x2_t  vget_high(const uint32x4_t  & v) { return vget_high_u32(v); }
-inline int32x2_t   vget_high(const int32x4_t   & v) { return vget_high_s32(v); }
-inline float32x2_t vget_high(const float32x4_t & v) { return vget_high_f32(v); }
-
-////////////////////////////// vcombine ///////////////////////
-
-inline   uint8x16_t vcombine(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vcombine_u8 (v0, v1); }
-inline    int8x16_t vcombine(const int8x8_t    & v0, const int8x8_t    & v1) { return vcombine_s8 (v0, v1); }
-inline  uint16x8_t  vcombine(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vcombine_u16(v0, v1); }
-inline   int16x8_t  vcombine(const int16x4_t   & v0, const int16x4_t   & v1) { return vcombine_s16(v0, v1); }
-inline  uint32x4_t  vcombine(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vcombine_u32(v0, v1); }
-inline   int32x4_t  vcombine(const int32x2_t   & v0, const int32x2_t   & v1) { return vcombine_s32(v0, v1); }
-inline float32x4_t  vcombine(const float32x2_t & v0, const float32x2_t & v1) { return vcombine_f32(v0, v1); }
-
-////////////////////////////// vaddq ///////////////////////
-
-inline  uint8x16_t vaddq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vaddq_u8 (v0, v1); }
-inline   int8x16_t vaddq(const int8x16_t   & v0, const int8x16_t   & v1) { return vaddq_s8 (v0, v1); }
-inline  uint16x8_t vaddq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vaddq_u16(v0, v1); }
-inline   int16x8_t vaddq(const int16x8_t   & v0, const int16x8_t   & v1) { return vaddq_s16(v0, v1); }
-inline  uint32x4_t vaddq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vaddq_u32(v0, v1); }
-inline   int32x4_t vaddq(const int32x4_t   & v0, const int32x4_t   & v1) { return vaddq_s32(v0, v1); }
-inline float32x4_t vaddq(const float32x4_t & v0, const float32x4_t & v1) { return vaddq_f32(v0, v1); }
-
-////////////////////////////// vadd ///////////////////////
-
-inline   uint8x8_t vadd(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vadd_u8 (v0, v1); }
-inline    int8x8_t vadd(const int8x8_t    & v0, const int8x8_t    & v1) { return vadd_s8 (v0, v1); }
-inline  uint16x4_t vadd(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vadd_u16(v0, v1); }
-inline   int16x4_t vadd(const int16x4_t   & v0, const int16x4_t   & v1) { return vadd_s16(v0, v1); }
-inline  uint32x2_t vadd(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vadd_u32(v0, v1); }
-inline   int32x2_t vadd(const int32x2_t   & v0, const int32x2_t   & v1) { return vadd_s32(v0, v1); }
-inline float32x2_t vadd(const float32x2_t & v0, const float32x2_t & v1) { return vadd_f32(v0, v1); }
-
-////////////////////////////// vqaddq ///////////////////////
-
-inline  uint8x16_t vqaddq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vqaddq_u8 (v0, v1); }
-inline   int8x16_t vqaddq(const int8x16_t   & v0, const int8x16_t   & v1) { return vqaddq_s8 (v0, v1); }
-inline  uint16x8_t vqaddq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vqaddq_u16(v0, v1); }
-inline   int16x8_t vqaddq(const int16x8_t   & v0, const int16x8_t   & v1) { return vqaddq_s16(v0, v1); }
-inline  uint32x4_t vqaddq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vqaddq_u32(v0, v1); }
-inline   int32x4_t vqaddq(const int32x4_t   & v0, const int32x4_t   & v1) { return vqaddq_s32(v0, v1); }
-
-////////////////////////////// vqadd ///////////////////////
-
-inline   uint8x8_t vqadd(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vqadd_u8 (v0, v1); }
-inline    int8x8_t vqadd(const int8x8_t    & v0, const int8x8_t    & v1) { return vqadd_s8 (v0, v1); }
-inline  uint16x4_t vqadd(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vqadd_u16(v0, v1); }
-inline   int16x4_t vqadd(const int16x4_t   & v0, const int16x4_t   & v1) { return vqadd_s16(v0, v1); }
-inline  uint32x2_t vqadd(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vqadd_u32(v0, v1); }
-inline   int32x2_t vqadd(const int32x2_t   & v0, const int32x2_t   & v1) { return vqadd_s32(v0, v1); }
-
-////////////////////////////// vsubq ///////////////////////
-
-inline  uint8x16_t vsubq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vsubq_u8 (v0, v1); }
-inline   int8x16_t vsubq(const int8x16_t   & v0, const int8x16_t   & v1) { return vsubq_s8 (v0, v1); }
-inline  uint16x8_t vsubq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vsubq_u16(v0, v1); }
-inline   int16x8_t vsubq(const int16x8_t   & v0, const int16x8_t   & v1) { return vsubq_s16(v0, v1); }
-inline  uint32x4_t vsubq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vsubq_u32(v0, v1); }
-inline   int32x4_t vsubq(const int32x4_t   & v0, const int32x4_t   & v1) { return vsubq_s32(v0, v1); }
-inline float32x4_t vsubq(const float32x4_t & v0, const float32x4_t & v1) { return vsubq_f32(v0, v1); }
-
-////////////////////////////// vsub ///////////////////////
-
-inline   uint8x8_t vsub(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vsub_u8 (v0, v1); }
-inline    int8x8_t vsub(const int8x8_t    & v0, const int8x8_t    & v1) { return vsub_s8 (v0, v1); }
-inline  uint16x4_t vsub(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vsub_u16(v0, v1); }
-inline   int16x4_t vsub(const int16x4_t   & v0, const int16x4_t   & v1) { return vsub_s16(v0, v1); }
-inline  uint32x2_t vsub(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vsub_u32(v0, v1); }
-inline   int32x2_t vsub(const int32x2_t   & v0, const int32x2_t   & v1) { return vsub_s32(v0, v1); }
-inline float32x2_t vsub(const float32x2_t & v0, const float32x2_t & v1) { return vsub_f32(v0, v1); }
-
-////////////////////////////// vqsubq ///////////////////////
-
-inline  uint8x16_t vqsubq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vqsubq_u8 (v0, v1); }
-inline   int8x16_t vqsubq(const int8x16_t   & v0, const int8x16_t   & v1) { return vqsubq_s8 (v0, v1); }
-inline  uint16x8_t vqsubq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vqsubq_u16(v0, v1); }
-inline   int16x8_t vqsubq(const int16x8_t   & v0, const int16x8_t   & v1) { return vqsubq_s16(v0, v1); }
-inline  uint32x4_t vqsubq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vqsubq_u32(v0, v1); }
-inline   int32x4_t vqsubq(const int32x4_t   & v0, const int32x4_t   & v1) { return vqsubq_s32(v0, v1); }
-inline  uint64x2_t vqsubq(const uint64x2_t  & v0, const uint64x2_t  & v1) { return vqsubq_u64(v0, v1); }
-inline   int64x2_t vqsubq(const int64x2_t   & v0, const int64x2_t   & v1) { return vqsubq_s64(v0, v1); }
-
-////////////////////////////// vqsub ///////////////////////
-
-inline   uint8x8_t vqsub(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vqsub_u8 (v0, v1); }
-inline    int8x8_t vqsub(const int8x8_t    & v0, const int8x8_t    & v1) { return vqsub_s8 (v0, v1); }
-inline  uint16x4_t vqsub(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vqsub_u16(v0, v1); }
-inline   int16x4_t vqsub(const int16x4_t   & v0, const int16x4_t   & v1) { return vqsub_s16(v0, v1); }
-inline  uint32x2_t vqsub(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vqsub_u32(v0, v1); }
-inline   int32x2_t vqsub(const int32x2_t   & v0, const int32x2_t   & v1) { return vqsub_s32(v0, v1); }
-inline  uint64x1_t vqsub(const uint64x1_t  & v0, const uint64x1_t  & v1) { return vqsub_u64(v0, v1); }
-inline   int64x1_t vqsub(const int64x1_t   & v0, const int64x1_t   & v1) { return vqsub_s64(v0, v1); }
-
-////////////////////////////// vmull ///////////////////////
-
-inline  uint16x8_t vmull(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vmull_u8 (v0, v1); }
-inline   int16x8_t vmull(const int8x8_t    & v0, const int8x8_t    & v1) { return vmull_s8 (v0, v1); }
-inline  uint32x4_t vmull(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vmull_u16(v0, v1); }
-inline   int32x4_t vmull(const int16x4_t   & v0, const int16x4_t   & v1) { return vmull_s16(v0, v1); }
-inline  uint64x2_t vmull(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vmull_u32(v0, v1); }
-inline   int64x2_t vmull(const int32x2_t   & v0, const int32x2_t   & v1) { return vmull_s32(v0, v1); }
-
-////////////////////////////// vrev64q ///////////////////////
-
-inline uint8x16_t  vrev64q(const uint8x16_t  & v) { return vrev64q_u8 (v); }
-inline int8x16_t   vrev64q(const int8x16_t   & v) { return vrev64q_s8 (v); }
-inline uint16x8_t  vrev64q(const uint16x8_t  & v) { return vrev64q_u16(v); }
-inline int16x8_t   vrev64q(const int16x8_t   & v) { return vrev64q_s16(v); }
-inline uint32x4_t  vrev64q(const uint32x4_t  & v) { return vrev64q_u32(v); }
-inline int32x4_t   vrev64q(const int32x4_t   & v) { return vrev64q_s32(v); }
-inline float32x4_t vrev64q(const float32x4_t & v) { return vrev64q_f32(v); }
-
-////////////////////////////// vrev64 ///////////////////////
-
-inline uint8x8_t   vrev64(const uint8x8_t   & v) { return vrev64_u8 (v); }
-inline int8x8_t    vrev64(const int8x8_t    & v) { return vrev64_s8 (v); }
-inline uint16x4_t  vrev64(const uint16x4_t  & v) { return vrev64_u16(v); }
-inline int16x4_t   vrev64(const int16x4_t   & v) { return vrev64_s16(v); }
-inline uint32x2_t  vrev64(const uint32x2_t  & v) { return vrev64_u32(v); }
-inline int32x2_t   vrev64(const int32x2_t   & v) { return vrev64_s32(v); }
-inline float32x2_t vrev64(const float32x2_t & v) { return vrev64_f32(v); }
-
-////////////////////////////// vceqq ///////////////////////
-
-inline  uint8x16_t vceqq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vceqq_u8 (v0, v1); }
-inline  uint8x16_t vceqq(const int8x16_t   & v0, const int8x16_t   & v1) { return vceqq_s8 (v0, v1); }
-inline  uint16x8_t vceqq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vceqq_u16(v0, v1); }
-inline  uint16x8_t vceqq(const int16x8_t   & v0, const int16x8_t   & v1) { return vceqq_s16(v0, v1); }
-inline  uint32x4_t vceqq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vceqq_u32(v0, v1); }
-inline  uint32x4_t vceqq(const int32x4_t   & v0, const int32x4_t   & v1) { return vceqq_s32(v0, v1); }
-inline  uint32x4_t vceqq(const float32x4_t & v0, const float32x4_t & v1) { return vceqq_f32(v0, v1); }
-
-////////////////////////////// vceq ///////////////////////
-
-inline   uint8x8_t vceq(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vceq_u8 (v0, v1); }
-inline   uint8x8_t vceq(const int8x8_t    & v0, const int8x8_t    & v1) { return vceq_s8 (v0, v1); }
-inline  uint16x4_t vceq(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vceq_u16(v0, v1); }
-inline  uint16x4_t vceq(const int16x4_t   & v0, const int16x4_t   & v1) { return vceq_s16(v0, v1); }
-inline  uint32x2_t vceq(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vceq_u32(v0, v1); }
-inline  uint32x2_t vceq(const int32x2_t   & v0, const int32x2_t   & v1) { return vceq_s32(v0, v1); }
-inline  uint32x2_t vceq(const float32x2_t & v0, const float32x2_t & v1) { return vceq_f32(v0, v1); }
-
-////////////////////////////// vcgtq ///////////////////////
-
-inline  uint8x16_t vcgtq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vcgtq_u8 (v0, v1); }
-inline  uint8x16_t vcgtq(const int8x16_t   & v0, const int8x16_t   & v1) { return vcgtq_s8 (v0, v1); }
-inline  uint16x8_t vcgtq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vcgtq_u16(v0, v1); }
-inline  uint16x8_t vcgtq(const int16x8_t   & v0, const int16x8_t   & v1) { return vcgtq_s16(v0, v1); }
-inline  uint32x4_t vcgtq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vcgtq_u32(v0, v1); }
-inline  uint32x4_t vcgtq(const int32x4_t   & v0, const int32x4_t   & v1) { return vcgtq_s32(v0, v1); }
-inline  uint32x4_t vcgtq(const float32x4_t & v0, const float32x4_t & v1) { return vcgtq_f32(v0, v1); }
-
-////////////////////////////// vcgt ///////////////////////
-
-inline   uint8x8_t vcgt(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vcgt_u8 (v0, v1); }
-inline   uint8x8_t vcgt(const int8x8_t    & v0, const int8x8_t    & v1) { return vcgt_s8 (v0, v1); }
-inline  uint16x4_t vcgt(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vcgt_u16(v0, v1); }
-inline  uint16x4_t vcgt(const int16x4_t   & v0, const int16x4_t   & v1) { return vcgt_s16(v0, v1); }
-inline  uint32x2_t vcgt(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vcgt_u32(v0, v1); }
-inline  uint32x2_t vcgt(const int32x2_t   & v0, const int32x2_t   & v1) { return vcgt_s32(v0, v1); }
-inline  uint32x2_t vcgt(const float32x2_t & v0, const float32x2_t & v1) { return vcgt_f32(v0, v1); }
-
-////////////////////////////// vcgeq ///////////////////////
-
-inline  uint8x16_t vcgeq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vcgeq_u8 (v0, v1); }
-inline  uint8x16_t vcgeq(const int8x16_t   & v0, const int8x16_t   & v1) { return vcgeq_s8 (v0, v1); }
-inline  uint16x8_t vcgeq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vcgeq_u16(v0, v1); }
-inline  uint16x8_t vcgeq(const int16x8_t   & v0, const int16x8_t   & v1) { return vcgeq_s16(v0, v1); }
-inline  uint32x4_t vcgeq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vcgeq_u32(v0, v1); }
-inline  uint32x4_t vcgeq(const int32x4_t   & v0, const int32x4_t   & v1) { return vcgeq_s32(v0, v1); }
-inline  uint32x4_t vcgeq(const float32x4_t & v0, const float32x4_t & v1) { return vcgeq_f32(v0, v1); }
-
-////////////////////////////// vcge ///////////////////////
-
-inline   uint8x8_t vcge(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vcge_u8 (v0, v1); }
-inline   uint8x8_t vcge(const int8x8_t    & v0, const int8x8_t    & v1) { return vcge_s8 (v0, v1); }
-inline  uint16x4_t vcge(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vcge_u16(v0, v1); }
-inline  uint16x4_t vcge(const int16x4_t   & v0, const int16x4_t   & v1) { return vcge_s16(v0, v1); }
-inline  uint32x2_t vcge(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vcge_u32(v0, v1); }
-inline  uint32x2_t vcge(const int32x2_t   & v0, const int32x2_t   & v1) { return vcge_s32(v0, v1); }
-inline  uint32x2_t vcge(const float32x2_t & v0, const float32x2_t & v1) { return vcge_f32(v0, v1); }
-
-////////////////////////////// vandq ///////////////////////
-
-inline  uint8x16_t vandq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vandq_u8 (v0, v1); }
-inline   int8x16_t vandq(const int8x16_t   & v0, const int8x16_t   & v1) { return vandq_s8 (v0, v1); }
-inline  uint16x8_t vandq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vandq_u16(v0, v1); }
-inline   int16x8_t vandq(const int16x8_t   & v0, const int16x8_t   & v1) { return vandq_s16(v0, v1); }
-inline  uint32x4_t vandq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vandq_u32(v0, v1); }
-inline   int32x4_t vandq(const int32x4_t   & v0, const int32x4_t   & v1) { return vandq_s32(v0, v1); }
-
-////////////////////////////// vand ///////////////////////
-
-inline   uint8x8_t vand(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vand_u8 (v0, v1); }
-inline    int8x8_t vand(const int8x8_t    & v0, const int8x8_t    & v1) { return vand_s8 (v0, v1); }
-inline  uint16x4_t vand(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vand_u16(v0, v1); }
-inline   int16x4_t vand(const int16x4_t   & v0, const int16x4_t   & v1) { return vand_s16(v0, v1); }
-inline  uint32x2_t vand(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vand_u32(v0, v1); }
-inline   int32x2_t vand(const int32x2_t   & v0, const int32x2_t   & v1) { return vand_s32(v0, v1); }
-
-////////////////////////////// vmovn ///////////////////////
-
-inline uint8x8_t   vmovn(const uint16x8_t  & v) { return vmovn_u16(v); }
-inline int8x8_t    vmovn(const int16x8_t   & v) { return vmovn_s16(v); }
-inline uint16x4_t  vmovn(const uint32x4_t  & v) { return vmovn_u32(v); }
-inline int16x4_t   vmovn(const int32x4_t   & v) { return vmovn_s32(v); }
-inline uint32x2_t  vmovn(const uint64x2_t  & v) { return vmovn_u64(v); }
-inline int32x2_t   vmovn(const int64x2_t   & v) { return vmovn_s64(v); }
-
-////////////////////////////// vqmovn ///////////////////////
-
-inline uint8x8_t   vqmovn(const uint16x8_t  & v) { return vqmovn_u16(v); }
-inline int8x8_t    vqmovn(const int16x8_t   & v) { return vqmovn_s16(v); }
-inline uint16x4_t  vqmovn(const uint32x4_t  & v) { return vqmovn_u32(v); }
-inline int16x4_t   vqmovn(const int32x4_t   & v) { return vqmovn_s32(v); }
-inline uint32x2_t  vqmovn(const uint64x2_t  & v) { return vqmovn_u64(v); }
-inline int32x2_t   vqmovn(const int64x2_t   & v) { return vqmovn_s64(v); }
-
-////////////////////////////// vmovl ///////////////////////
-
-inline uint16x8_t  vmovl(const uint8x8_t   & v) { return  vmovl_u8(v); }
-inline int16x8_t   vmovl(const int8x8_t    & v) { return  vmovl_s8(v); }
-inline uint32x4_t  vmovl(const uint16x4_t  & v) { return vmovl_u16(v); }
-inline int32x4_t   vmovl(const int16x4_t   & v) { return vmovl_s16(v); }
-
-////////////////////////////// vmvnq ///////////////////////
-
-inline uint8x16_t  vmvnq(const uint8x16_t  & v) { return vmvnq_u8 (v); }
-inline int8x16_t   vmvnq(const int8x16_t   & v) { return vmvnq_s8 (v); }
-inline uint16x8_t  vmvnq(const uint16x8_t  & v) { return vmvnq_u16(v); }
-inline int16x8_t   vmvnq(const int16x8_t   & v) { return vmvnq_s16(v); }
-inline uint32x4_t  vmvnq(const uint32x4_t  & v) { return vmvnq_u32(v); }
-inline int32x4_t   vmvnq(const int32x4_t   & v) { return vmvnq_s32(v); }
-
-////////////////////////////// vmvn ///////////////////////
-
-inline uint8x8_t   vmvn(const uint8x8_t   & v) { return vmvn_u8 (v); }
-inline int8x8_t    vmvn(const int8x8_t    & v) { return vmvn_s8 (v); }
-inline uint16x4_t  vmvn(const uint16x4_t  & v) { return vmvn_u16(v); }
-inline int16x4_t   vmvn(const int16x4_t   & v) { return vmvn_s16(v); }
-inline uint32x2_t  vmvn(const uint32x2_t  & v) { return vmvn_u32(v); }
-inline int32x2_t   vmvn(const int32x2_t   & v) { return vmvn_s32(v); }
-
-////////////////////////////// vbicq ///////////////////////
-
-inline  uint8x16_t vbicq(const uint8x16_t  & v0, const uint8x16_t  & v1) { return vbicq_u8 (v0, v1); }
-inline   int8x16_t vbicq(const int8x16_t   & v0, const int8x16_t   & v1) { return vbicq_s8 (v0, v1); }
-inline  uint16x8_t vbicq(const uint16x8_t  & v0, const uint16x8_t  & v1) { return vbicq_u16(v0, v1); }
-inline   int16x8_t vbicq(const int16x8_t   & v0, const int16x8_t   & v1) { return vbicq_s16(v0, v1); }
-inline  uint32x4_t vbicq(const uint32x4_t  & v0, const uint32x4_t  & v1) { return vbicq_u32(v0, v1); }
-inline   int32x4_t vbicq(const int32x4_t   & v0, const int32x4_t   & v1) { return vbicq_s32(v0, v1); }
-inline  uint64x2_t vbicq(const uint64x2_t  & v0, const uint64x2_t  & v1) { return vbicq_u64(v0, v1); }
-inline   int64x2_t vbicq(const int64x2_t   & v0, const int64x2_t   & v1) { return vbicq_s64(v0, v1); }
-
-////////////////////////////// vbic ///////////////////////
-
-inline   uint8x8_t vbic(const uint8x8_t   & v0, const uint8x8_t   & v1) { return vbic_u8 (v0, v1); }
-inline    int8x8_t vbic(const int8x8_t    & v0, const int8x8_t    & v1) { return vbic_s8 (v0, v1); }
-inline  uint16x4_t vbic(const uint16x4_t  & v0, const uint16x4_t  & v1) { return vbic_u16(v0, v1); }
-inline   int16x4_t vbic(const int16x4_t   & v0, const int16x4_t   & v1) { return vbic_s16(v0, v1); }
-inline  uint32x2_t vbic(const uint32x2_t  & v0, const uint32x2_t  & v1) { return vbic_u32(v0, v1); }
-inline   int32x2_t vbic(const int32x2_t   & v0, const int32x2_t   & v1) { return vbic_s32(v0, v1); }
-inline  uint64x1_t vbic(const uint64x1_t  & v0, const uint64x1_t  & v1) { return vbic_u64(v0, v1); }
-inline   int64x1_t vbic(const int64x1_t   & v0, const int64x1_t   & v1) { return vbic_s64(v0, v1); }
-
-////////////////////////////// vtransform ///////////////////////
-
-template <typename Op>
-void vtransform(Size2D size,
-                const typename Op::type * src0Base, ptrdiff_t src0Stride,
-                const typename Op::type * src1Base, ptrdiff_t src1Stride,
-                typename Op::type * dstBase, ptrdiff_t dstStride, const Op & op)
-{
-    typedef typename Op::type type;
-    typedef typename VecTraits<type>::vec128 vec128;
-    typedef typename VecTraits<type>::vec64 vec64;
-
-    if (src0Stride == src1Stride && src0Stride == dstStride &&
-        src0Stride == (ptrdiff_t)(size.width * sizeof(type)))
-    {
-        size.width *= size.height;
-        size.height = 1;
-    }
-
-    const size_t step_base = 32 / sizeof(type);
-    size_t roiw_base = size.width >= (step_base - 1) ? size.width - step_base + 1 : 0;
-    const size_t step_tail = 8 / sizeof(type);
-    size_t roiw_tail = size.width >= (step_tail - 1) ? size.width - step_tail + 1 : 0;
-
-    for (size_t y = 0; y < size.height; ++y)
-    {
-        const type * src0 = internal::getRowPtr(src0Base, src0Stride, y);
-        const type * src1 = internal::getRowPtr(src1Base, src1Stride, y);
-        typename Op::type * dst = internal::getRowPtr(dstBase, dstStride, y);
-        size_t x = 0;
-
-        for( ; x < roiw_base; x += step_base )
-        {
-            internal::prefetch(src0 + x);
-            internal::prefetch(src1 + x);
-
-            vec128 v_src00 = vld1q(src0 + x), v_src01 = vld1q(src0 + x + 16 / sizeof(type));
-            vec128 v_src10 = vld1q(src1 + x), v_src11 = vld1q(src1 + x + 16 / sizeof(type));
-            vec128 v_dst;
-
-            op(v_src00, v_src10, v_dst);
-            vst1q(dst + x, v_dst);
-
-            op(v_src01, v_src11, v_dst);
-            vst1q(dst + x + 16 / sizeof(type), v_dst);
-        }
-        for( ; x < roiw_tail; x += step_tail )
-        {
-            vec64 v_src0 = vld1(src0 + x);
-            vec64 v_src1 = vld1(src1 + x);
-            vec64 v_dst;
-
-            op(v_src0, v_src1, v_dst);
-            vst1(dst + x, v_dst);
-        }
-
-        for (; x < size.width; ++x)
-        {
-            op(src0 + x, src1 + x, dst + x);
-        }
-    }
-}
-
-} }
-
-#endif // CAROTENE_NEON
-
-#endif
--- a/3rdparty/carotene/src/warp_affine.cpp
+++ b/3rdparty/carotene/src/warp_affine.cpp
@ -1,434 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-#include "remap.hpp"
-
-namespace CAROTENE_NS {
-
-bool isWarpAffineNearestNeighborSupported(const Size2D &ssize)
-{
-#if SIZE_MAX > UINT32_MAX
-    return !(ssize.width > 0xffffFFFF || ssize.height > 0xffffFFFF) && // Restrict image size since internal index evaluation
-                                                                       // is performed with u32
-           isSupportedConfiguration();
-#else
-    (void)ssize;
-    return isSupportedConfiguration();
-#endif
-}
-
-bool isWarpAffineLinearSupported(const Size2D &ssize)
-{
-#if SIZE_MAX > UINT32_MAX
-    return !(ssize.width > 0xffffFFFF || ssize.height > 0xffffFFFF) && // Restrict image size since internal index evaluation
-                                                                       // is performed with u32
-           isSupportedConfiguration();
-#else
-    (void)ssize;
-    return isSupportedConfiguration();
-#endif
-}
-
-void warpAffineNearestNeighbor(const Size2D &ssize, const Size2D &dsize,
-                               const u8 * srcBase, ptrdiff_t srcStride,
-                               const f32 * m,
-                               u8 * dstBase, ptrdiff_t dstStride,
-                               BORDER_MODE borderMode, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isWarpAffineNearestNeighborSupported(ssize));
-#ifdef CAROTENE_NEON
-    using namespace internal;
-
-    s32 _map[BLOCK_SIZE * BLOCK_SIZE + 16];
-    s32 * map = alignPtr(_map, 16);
-
-    int32x4_t v_width4 = vdupq_n_s32(ssize.width - 1), v_height4 = vdupq_n_s32(ssize.height - 1);
-    int32x4_t v_step4 = vdupq_n_s32(srcStride);
-    float32x4_t v_4 = vdupq_n_f32(4.0f);
-
-    float32x4_t v_m0 = vdupq_n_f32(m[0]);
-    float32x4_t v_m1 = vdupq_n_f32(m[1]);
-    float32x4_t v_m2 = vdupq_n_f32(m[2]);
-    float32x4_t v_m3 = vdupq_n_f32(m[3]);
-    float32x4_t v_m4 = vdupq_n_f32(m[4]);
-    float32x4_t v_m5 = vdupq_n_f32(m[5]);
-
-    if (borderMode == BORDER_MODE_REPLICATE)
-    {
-        int32x4_t v_zero4 = vdupq_n_s32(0);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(&map[0], blockWidth * sizeof(s32), y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_);
-                    float32x4_t v_yx = vmlaq_f32(v_m4, v_m2, v_y), v_yy = vmlaq_f32(v_m5, v_m3, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-
-                        int32x4_t v_src_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vcvtq_s32_f32(v_src_xf)));
-                        int32x4_t v_src_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vcvtq_s32_f32(v_src_yf)));
-                        int32x4_t v_src_index = vmlaq_s32(v_src_x, v_src_y, v_step4);
-                        vst1q_s32(map_row + x, v_src_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[2] * y_ + m[4], yy = m[3] * y_ + m[5];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 src_x_f = m[0] * x_ + yx;
-                        f32 src_y_f = m[1] * x_ + yy;
-                        s32 src_x = floorf(src_x_f), src_y = floorf(src_y_f);
-
-                        src_x = std::max(0, std::min<s32>(ssize.width - 1, src_x));
-                        src_y = std::max(0, std::min<s32>(ssize.height - 1, src_y));
-                        map_row[x] = src_y * srcStride + src_x;
-                    }
-                }
-
-                // make remap
-                remapNearestNeighborReplicate(Size2D(blockWidth, blockHeight), srcBase, &map[0],
-                                                        getRowPtr(dstBase, dstStride, i) + j, dstStride);
-            }
-        }
-    }
-    else if (borderMode == BORDER_MODE_CONSTANT)
-    {
-        int32x4_t v_m1_4 = vdupq_n_s32(-1);
-        float32x4_t v_zero4 = vdupq_n_f32(0.0f);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(&map[0], blockWidth * sizeof(s32), y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_);
-                    float32x4_t v_yx = vmlaq_f32(v_m4, v_m2, v_y), v_yy = vmlaq_f32(v_m5, v_m3, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-
-                        int32x4_t v_src_x = vcvtq_s32_f32(v_src_xf);
-                        int32x4_t v_src_y = vcvtq_s32_f32(v_src_yf);
-                        uint32x4_t v_mask = vandq_u32(vandq_u32(vcgeq_f32(v_src_xf, v_zero4), vcleq_s32(v_src_x, v_width4)),
-                                                      vandq_u32(vcgeq_f32(v_src_yf, v_zero4), vcleq_s32(v_src_y, v_height4)));
-                        int32x4_t v_src_index = vbslq_s32(v_mask, vmlaq_s32(v_src_x, v_src_y, v_step4), v_m1_4);
-                        vst1q_s32(map_row + x, v_src_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[2] * y_ + m[4], yy = m[3] * y_ + m[5];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 src_x_f = m[0] * x_ + yx;
-                        f32 src_y_f = m[1] * x_ + yy;
-                        s32 src_x = floorf(src_x_f), src_y = floorf(src_y_f);
-
-                        map_row[x] = (src_x >= 0) && (src_x < (s32)ssize.width) &&
-                                     (src_y >= 0) && (src_y < (s32)ssize.height) ? src_y * srcStride + src_x : -1;
-                    }
-                }
-
-                // make remap
-                remapNearestNeighborConst(Size2D(blockWidth, blockHeight), srcBase, &map[0],
-                                                    getRowPtr(dstBase, dstStride, i) + j, dstStride, borderValue);
-            }
-        }
-    }
-#else
-    (void)ssize;
-    (void)dsize;
-    (void)srcBase;
-    (void)srcStride;
-    (void)m;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderMode;
-    (void)borderValue;
-#endif
-}
-
-void warpAffineLinear(const Size2D &ssize, const Size2D &dsize,
-                      const u8 * srcBase, ptrdiff_t srcStride,
-                      const f32 * m,
-                      u8 * dstBase, ptrdiff_t dstStride,
-                      BORDER_MODE borderMode, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isWarpAffineLinearSupported(ssize));
-#ifdef CAROTENE_NEON
-    using namespace internal;
-
-    s32 _map[((BLOCK_SIZE * BLOCK_SIZE) << 2) + 16];
-    f32 _coeffs[((BLOCK_SIZE * BLOCK_SIZE) << 1) + 16];
-    s32 * map = alignPtr(_map, 16);
-    f32 * coeffs = alignPtr(_coeffs, 16);
-
-    int32x4_t v_width4 = vdupq_n_s32(ssize.width - 1), v_height4 = vdupq_n_s32(ssize.height - 1);
-    int32x4_t v_step4 = vdupq_n_s32(srcStride), v_1 = vdupq_n_s32(1);
-    float32x4_t v_zero4f = vdupq_n_f32(0.0f), v_one4f = vdupq_n_f32(1.0f);
-
-    float32x4_t v_m0 = vdupq_n_f32(m[0]);
-    float32x4_t v_m1 = vdupq_n_f32(m[1]);
-    float32x4_t v_m2 = vdupq_n_f32(m[2]);
-    float32x4_t v_m3 = vdupq_n_f32(m[3]);
-    float32x4_t v_m4 = vdupq_n_f32(m[4]);
-    float32x4_t v_m5 = vdupq_n_f32(m[5]);
-
-    if (borderMode == BORDER_MODE_REPLICATE)
-    {
-        int32x4_t v_zero4 = vdupq_n_s32(0);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(map, blockWidth * sizeof(s32) * 4, y);
-                    f32 * coeff_row = getRowPtr(coeffs, blockWidth * sizeof(f32) * 2, y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_), v_4 = vdupq_n_f32(4.0f);
-                    float32x4_t v_yx = vmlaq_f32(v_m4, v_m2, v_y), v_yy = vmlaq_f32(v_m5, v_m3, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-
-                        int32x4_t v_src_x = vcvtq_s32_f32(v_src_xf);
-                        int32x4_t v_src_y = vcvtq_s32_f32(v_src_yf);
-
-                        float32x4x2_t v_coeff;
-                        v_coeff.val[0] = vsubq_f32(v_src_xf, vcvtq_f32_s32(v_src_x));
-                        v_coeff.val[1] = vsubq_f32(v_src_yf, vcvtq_f32_s32(v_src_y));
-                        uint32x4_t v_maskx = vcltq_f32(v_coeff.val[0], v_zero4f);
-                        uint32x4_t v_masky = vcltq_f32(v_coeff.val[1], v_zero4f);
-                        v_coeff.val[0] = vbslq_f32(v_maskx, vaddq_f32(v_one4f, v_coeff.val[0]), v_coeff.val[0]);
-                        v_coeff.val[1] = vbslq_f32(v_masky, vaddq_f32(v_one4f, v_coeff.val[1]), v_coeff.val[1]);
-                        v_src_x = vbslq_s32(v_maskx, vsubq_s32(v_src_x, v_1), v_src_x);
-                        v_src_y = vbslq_s32(v_masky, vsubq_s32(v_src_y, v_1), v_src_y);
-
-                        int32x4_t v_dst0_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, v_src_x));
-                        int32x4_t v_dst0_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, v_src_y));
-                        int32x4_t v_dst1_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vaddq_s32(v_1, v_src_x)));
-                        int32x4_t v_dst1_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vaddq_s32(v_1, v_src_y)));
-
-                        int32x4x4_t v_dst_index;
-                        v_dst_index.val[0] = vmlaq_s32(v_dst0_x, v_dst0_y, v_step4);
-                        v_dst_index.val[1] = vmlaq_s32(v_dst1_x, v_dst0_y, v_step4);
-                        v_dst_index.val[2] = vmlaq_s32(v_dst0_x, v_dst1_y, v_step4);
-                        v_dst_index.val[3] = vmlaq_s32(v_dst1_x, v_dst1_y, v_step4);
-
-                        vst2q_f32(coeff_row + (x << 1), v_coeff);
-                        vst4q_s32(map_row + (x << 2), v_dst_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[2] * y_ + m[4], yy = m[3] * y_ + m[5];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 src_x_f = m[0] * x_ + yx;
-                        f32 src_y_f = m[1] * x_ + yy;
-
-                        s32 src0_x = (s32)floorf(src_x_f);
-                        s32 src0_y = (s32)floorf(src_y_f);
-
-                        coeff_row[(x << 1) + 0] = src_x_f - src0_x;
-                        coeff_row[(x << 1) + 1] = src_y_f - src0_y;
-
-                        s32 src1_y = std::max(0, std::min<s32>(ssize.height - 1, src0_y + 1));
-                        src0_y = std::max(0, std::min<s32>(ssize.height - 1, src0_y));
-                        s32 src1_x = std::max(0, std::min<s32>(ssize.width - 1, src0_x + 1));
-                        src0_x = std::max(0, std::min<s32>(ssize.width - 1, src0_x));
-
-                        map_row[(x << 2) + 0] = src0_y * srcStride + src0_x;
-                        map_row[(x << 2) + 1] = src0_y * srcStride + src1_x;
-                        map_row[(x << 2) + 2] = src1_y * srcStride + src0_x;
-                        map_row[(x << 2) + 3] = src1_y * srcStride + src1_x;
-                    }
-                }
-
-                remapLinearReplicate(Size2D(blockWidth, blockHeight),
-                                     srcBase, &map[0], &coeffs[0],
-                                     getRowPtr(dstBase, dstStride, i) + j, dstStride);
-            }
-        }
-    }
-    else if (borderMode == BORDER_MODE_CONSTANT)
-    {
-        float32x4_t v_zero4 = vdupq_n_f32(0.0f);
-        int32x4_t v_m1_4 = vdupq_n_s32(-1);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(map, blockWidth * sizeof(s32) * 4, y);
-                    f32 * coeff_row = getRowPtr(coeffs, blockWidth * sizeof(f32) * 2, y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_), v_4 = vdupq_n_f32(4.0f);
-                    float32x4_t v_yx = vmlaq_f32(v_m4, v_m2, v_y), v_yy = vmlaq_f32(v_m5, v_m3, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-
-                        int32x4_t v_src_x0 = vcvtq_s32_f32(v_src_xf);
-                        int32x4_t v_src_y0 = vcvtq_s32_f32(v_src_yf);
-
-                        float32x4x2_t v_coeff;
-                        v_coeff.val[0] = vsubq_f32(v_src_xf, vcvtq_f32_s32(v_src_x0));
-                        v_coeff.val[1] = vsubq_f32(v_src_yf, vcvtq_f32_s32(v_src_y0));
-                        uint32x4_t v_maskx = vcltq_f32(v_coeff.val[0], v_zero4f);
-                        uint32x4_t v_masky = vcltq_f32(v_coeff.val[1], v_zero4f);
-                        v_coeff.val[0] = vbslq_f32(v_maskx, vaddq_f32(v_one4f, v_coeff.val[0]), v_coeff.val[0]);
-                        v_coeff.val[1] = vbslq_f32(v_masky, vaddq_f32(v_one4f, v_coeff.val[1]), v_coeff.val[1]);
-                        v_src_x0 = vbslq_s32(v_maskx, vsubq_s32(v_src_x0, v_1), v_src_x0);
-                        v_src_y0 = vbslq_s32(v_masky, vsubq_s32(v_src_y0, v_1), v_src_y0);
-
-                        int32x4_t v_src_x1 = vaddq_s32(v_src_x0, v_1);
-                        int32x4_t v_src_y1 = vaddq_s32(v_src_y0, v_1);
-
-                        int32x4x4_t v_dst_index;
-                        v_dst_index.val[0] = vmlaq_s32(v_src_x0, v_src_y0, v_step4);
-                        v_dst_index.val[1] = vmlaq_s32(v_src_x1, v_src_y0, v_step4);
-                        v_dst_index.val[2] = vmlaq_s32(v_src_x0, v_src_y1, v_step4);
-                        v_dst_index.val[3] = vmlaq_s32(v_src_x1, v_src_y1, v_step4);
-
-                        uint32x4_t v_mask_x0 = vandq_u32(vcgeq_f32(v_src_xf, v_zero4), vcleq_s32(v_src_x0, v_width4));
-                        uint32x4_t v_mask_x1 = vandq_u32(vcgeq_f32(vaddq_f32(v_src_xf, v_one4f), v_zero4), vcleq_s32(v_src_x1, v_width4));
-                        uint32x4_t v_mask_y0 = vandq_u32(vcgeq_f32(v_src_yf, v_zero4), vcleq_s32(v_src_y0, v_height4));
-                        uint32x4_t v_mask_y1 = vandq_u32(vcgeq_f32(vaddq_f32(v_src_yf, v_one4f), v_zero4), vcleq_s32(v_src_y1, v_height4));
-
-                        v_dst_index.val[0] = vbslq_s32(vandq_u32(v_mask_x0, v_mask_y0), v_dst_index.val[0], v_m1_4);
-                        v_dst_index.val[1] = vbslq_s32(vandq_u32(v_mask_x1, v_mask_y0), v_dst_index.val[1], v_m1_4);
-                        v_dst_index.val[2] = vbslq_s32(vandq_u32(v_mask_x0, v_mask_y1), v_dst_index.val[2], v_m1_4);
-                        v_dst_index.val[3] = vbslq_s32(vandq_u32(v_mask_x1, v_mask_y1), v_dst_index.val[3], v_m1_4);
-
-                        vst2q_f32(coeff_row + (x << 1), v_coeff);
-                        vst4q_s32(map_row + (x << 2), v_dst_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[2] * y_ + m[4], yy = m[3] * y_ + m[5];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 src_x_f = m[0] * x_ + yx;
-                        f32 src_y_f = m[1] * x_ + yy;
-
-                        s32 src0_x = (s32)floorf(src_x_f), src1_x = src0_x + 1;
-                        s32 src0_y = (s32)floorf(src_y_f), src1_y = src0_y + 1;
-
-                        coeff_row[(x << 1) + 0] = src_x_f - src0_x;
-                        coeff_row[(x << 1) + 1] = src_y_f - src0_y;
-
-                        map_row[(x << 2) + 0] = (src0_x >= 0) && (src0_x < (s32)ssize.width) &&
-                                                (src0_y >= 0) && (src0_y < (s32)ssize.height) ? src0_y * srcStride + src0_x : -1;
-                        map_row[(x << 2) + 1] = (src1_x >= 0) && (src1_x < (s32)ssize.width) &&
-                                                (src0_y >= 0) && (src0_y < (s32)ssize.height) ? src0_y * srcStride + src1_x : -1;
-                        map_row[(x << 2) + 2] = (src0_x >= 0) && (src0_x < (s32)ssize.width) &&
-                                                (src1_y >= 0) && (src1_y < (s32)ssize.height) ? src1_y * srcStride + src0_x : -1;
-                        map_row[(x << 2) + 3] = (src1_x >= 0) && (src1_x < (s32)ssize.width) &&
-                                                (src1_y >= 0) && (src1_y < (s32)ssize.height) ? src1_y * srcStride + src1_x : -1;
-                    }
-                }
-
-                remapLinearConst(Size2D(blockWidth, blockHeight),
-                                 srcBase, &map[0], &coeffs[0],
-                                 getRowPtr(dstBase, dstStride, i) + j, dstStride, borderValue);
-            }
-        }
-    }
-#else
-    (void)ssize;
-    (void)dsize;
-    (void)srcBase;
-    (void)srcStride;
-    (void)m;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderMode;
-    (void)borderValue;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/carotene/src/warp_perspective.cpp
+++ b/3rdparty/carotene/src/warp_perspective.cpp
@ -1,464 +0,0 @@
-/*
- * By downloading, copying, installing or using the software you agree to this license.
- * If you do not agree to this license, do not download, install,
- * copy or use the software.
- *
- *
- *                           License Agreement
- *                For Open Source Computer Vision Library
- *                        (3-clause BSD License)
- *
- * Copyright (C) 2015, NVIDIA Corporation, all rights reserved.
- * Third party copyrights are property of their respective owners.
- *
- * Redistribution and use in source and binary forms, with or without modification,
- * are permitted provided that the following conditions are met:
- *
- *   * Redistributions of source code must retain the above copyright notice,
- *     this list of conditions and the following disclaimer.
- *
- *   * Redistributions in binary form must reproduce the above copyright notice,
- *     this list of conditions and the following disclaimer in the documentation
- *     and/or other materials provided with the distribution.
- *
- *   * Neither the names of the copyright holders nor the names of the contributors
- *     may be used to endorse or promote products derived from this software
- *     without specific prior written permission.
- *
- * This software is provided by the copyright holders and contributors "as is" and
- * any express or implied warranties, including, but not limited to, the implied
- * warranties of merchantability and fitness for a particular purpose are disclaimed.
- * In no event shall copyright holders or contributors be liable for any direct,
- * indirect, incidental, special, exemplary, or consequential damages
- * (including, but not limited to, procurement of substitute goods or services;
- * loss of use, data, or profits; or business interruption) however caused
- * and on any theory of liability, whether in contract, strict liability,
- * or tort (including negligence or otherwise) arising in any way out of
- * the use of this software, even if advised of the possibility of such damage.
- */
-
-
-
-#include "remap.hpp"
-
-namespace CAROTENE_NS {
-
-bool isWarpPerspectiveNearestNeighborSupported(const Size2D &ssize)
-{
-#if SIZE_MAX > UINT32_MAX
-    return !(ssize.width > 0xffffFFFF || ssize.height > 0xffffFFFF) && // Restrict image size since internal index evaluation
-                                                                       // is performed with u32
-           isSupportedConfiguration();
-#else
-    (void)ssize;
-    return isSupportedConfiguration();
-#endif
-}
-
-bool isWarpPerspectiveLinearSupported(const Size2D &ssize)
-{
-#if SIZE_MAX > UINT32_MAX
-    return !(ssize.width > 0xffffFFFF || ssize.height > 0xffffFFFF) && // Restrict image size since internal index evaluation
-                                                                       // is performed with u32
-           isSupportedConfiguration();
-#else
-    (void)ssize;
-    return isSupportedConfiguration();
-#endif
-}
-
-void warpPerspectiveNearestNeighbor(const Size2D &ssize, const Size2D &dsize,
-                                    const u8 * srcBase, ptrdiff_t srcStride,
-                                    const f32 * m,
-                                    u8 * dstBase, ptrdiff_t dstStride,
-                                    BORDER_MODE borderMode, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isWarpPerspectiveNearestNeighborSupported(ssize));
-#ifdef CAROTENE_NEON
-    using namespace internal;
-
-    s32 _map[BLOCK_SIZE * BLOCK_SIZE + 16];
-    s32 * map = alignPtr(_map, 16);
-
-    int32x4_t v_width4 = vdupq_n_s32(ssize.width - 1), v_height4 = vdupq_n_s32(ssize.height - 1);
-    int32x4_t v_step4 = vdupq_n_s32(srcStride);
-    float32x4_t v_4 = vdupq_n_f32(4.0f);
-
-    float32x4_t v_m0 = vdupq_n_f32(m[0]);
-    float32x4_t v_m1 = vdupq_n_f32(m[1]);
-    float32x4_t v_m2 = vdupq_n_f32(m[2]);
-    float32x4_t v_m3 = vdupq_n_f32(m[3]);
-    float32x4_t v_m4 = vdupq_n_f32(m[4]);
-    float32x4_t v_m5 = vdupq_n_f32(m[5]);
-    float32x4_t v_m6 = vdupq_n_f32(m[6]);
-    float32x4_t v_m7 = vdupq_n_f32(m[7]);
-    float32x4_t v_m8 = vdupq_n_f32(m[8]);
-
-    if (borderMode == BORDER_MODE_REPLICATE)
-    {
-        int32x4_t v_zero4 = vdupq_n_s32(0);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(&map[0], blockWidth * sizeof(s32), y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_);
-                    float32x4_t v_yx = vmlaq_f32(v_m6, v_m3, v_y), v_yy = vmlaq_f32(v_m7, v_m4, v_y),
-                        v_yw = vmlaq_f32(v_m8, v_m5, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-                        float32x4_t v_wf = vrecpq_f32(vmlaq_f32(v_yw, v_m2, v_x));
-                        v_src_xf = vmulq_f32(v_wf, v_src_xf);
-                        v_src_yf = vmulq_f32(v_wf, v_src_yf);
-
-                        int32x4_t v_src_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vcvtq_s32_f32(v_src_xf)));
-                        int32x4_t v_src_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vcvtq_s32_f32(v_src_yf)));
-                        int32x4_t v_src_index = vmlaq_s32(v_src_x, v_src_y, v_step4);
-                        vst1q_s32(map_row + x, v_src_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[3] * y_ + m[6], yy = m[4] * y_ + m[7], yw = m[5] * y_ + m[8];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 w_f = 1.0f / (m[2] * x_ + yw);
-                        f32 src_x_f = (m[0] * x_ + yx) * w_f;
-                        f32 src_y_f = (m[1] * x_ + yy) * w_f;
-                        s32 src_x = floorf(src_x_f), src_y = floorf(src_y_f);
-
-                        src_x = std::max(0, std::min<s32>(ssize.width - 1, src_x));
-                        src_y = std::max(0, std::min<s32>(ssize.height - 1, src_y));
-                        map_row[x] = src_y * srcStride + src_x;
-                    }
-                }
-
-                // make remap
-                remapNearestNeighborReplicate(Size2D(blockWidth, blockHeight), srcBase, &map[0],
-                                              getRowPtr(dstBase, dstStride, i) + j, dstStride);
-            }
-        }
-    }
-    else if (borderMode == BORDER_MODE_CONSTANT)
-    {
-        int32x4_t v_m1_4 = vdupq_n_s32(-1);
-        float32x4_t v_zero4 = vdupq_n_f32(0.0f);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(&map[0], blockWidth * sizeof(s32), y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_);
-                    float32x4_t v_yx = vmlaq_f32(v_m6, v_m3, v_y), v_yy = vmlaq_f32(v_m7, v_m4, v_y),
-                        v_yw = vmlaq_f32(v_m8, v_m5, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-                        float32x4_t v_wf = vrecpq_f32(vmlaq_f32(v_yw, v_m2, v_x));
-                        v_src_xf = vmulq_f32(v_wf, v_src_xf);
-                        v_src_yf = vmulq_f32(v_wf, v_src_yf);
-
-                        int32x4_t v_src_x = vcvtq_s32_f32(v_src_xf);
-                        int32x4_t v_src_y = vcvtq_s32_f32(v_src_yf);
-                        uint32x4_t v_mask = vandq_u32(vandq_u32(vcgeq_f32(v_src_xf, v_zero4), vcleq_s32(v_src_x, v_width4)),
-                                                      vandq_u32(vcgeq_f32(v_src_yf, v_zero4), vcleq_s32(v_src_y, v_height4)));
-                        int32x4_t v_src_index = vbslq_s32(v_mask, vmlaq_s32(v_src_x, v_src_y, v_step4), v_m1_4);
-                        vst1q_s32(map_row + x, v_src_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[3] * y_ + m[6], yy = m[4] * y_ + m[7], yw = m[5] * y_ + m[8];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 w_f = 1.0f / (m[2] * x_ + yw);
-                        f32 src_x_f = (m[0] * x_ + yx) * w_f;
-                        f32 src_y_f = (m[1] * x_ + yy) * w_f;
-                        s32 src_x = floorf(src_x_f), src_y = floorf(src_y_f);
-
-                        map_row[x] = (src_x >= 0) && (src_x < (s32)ssize.width) &&
-                                     (src_y >= 0) && (src_y < (s32)ssize.height) ? src_y * srcStride + src_x : -1;
-                    }
-                }
-
-                // make remap
-                remapNearestNeighborConst(Size2D(blockWidth, blockHeight), srcBase, &map[0],
-                                          getRowPtr(dstBase, dstStride, i) + j, dstStride, borderValue);
-            }
-        }
-    }
-#else
-    (void)ssize;
-    (void)dsize;
-    (void)srcBase;
-    (void)srcStride;
-    (void)m;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderMode;
-    (void)borderValue;
-#endif
-}
-
-void warpPerspectiveLinear(const Size2D &ssize, const Size2D &dsize,
-                           const u8 * srcBase, ptrdiff_t srcStride,
-                           const f32 * m,
-                           u8 * dstBase, ptrdiff_t dstStride,
-                           BORDER_MODE borderMode, u8 borderValue)
-{
-    internal::assertSupportedConfiguration(isWarpPerspectiveLinearSupported(ssize));
-#ifdef CAROTENE_NEON
-    using namespace internal;
-
-    s32 _map[((BLOCK_SIZE * BLOCK_SIZE) << 2) + 16];
-    f32 _coeffs[((BLOCK_SIZE * BLOCK_SIZE) << 1) + 16];
-    s32 * map = alignPtr(_map, 16);
-    f32 * coeffs = alignPtr(_coeffs, 16);
-
-    int32x4_t v_width4 = vdupq_n_s32(ssize.width - 1), v_height4 = vdupq_n_s32(ssize.height - 1);
-    int32x4_t v_step4 = vdupq_n_s32(srcStride), v_1 = vdupq_n_s32(1);
-    float32x4_t v_zero4f = vdupq_n_f32(0.0f), v_one4f = vdupq_n_f32(1.0f);
-
-    float32x4_t v_4 = vdupq_n_f32(4.0f);
-
-    float32x4_t v_m0 = vdupq_n_f32(m[0]);
-    float32x4_t v_m1 = vdupq_n_f32(m[1]);
-    float32x4_t v_m2 = vdupq_n_f32(m[2]);
-    float32x4_t v_m3 = vdupq_n_f32(m[3]);
-    float32x4_t v_m4 = vdupq_n_f32(m[4]);
-    float32x4_t v_m5 = vdupq_n_f32(m[5]);
-    float32x4_t v_m6 = vdupq_n_f32(m[6]);
-    float32x4_t v_m7 = vdupq_n_f32(m[7]);
-    float32x4_t v_m8 = vdupq_n_f32(m[8]);
-
-    if (borderMode == BORDER_MODE_REPLICATE)
-    {
-        int32x4_t v_zero4 = vdupq_n_s32(0);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(map, blockWidth * sizeof(s32) * 4, y);
-                    f32 * coeff_row = getRowPtr(coeffs, blockWidth * sizeof(f32) * 2, y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_);
-                    float32x4_t v_yx = vmlaq_f32(v_m6, v_m3, v_y), v_yy = vmlaq_f32(v_m7, v_m4, v_y),
-                        v_yw = vmlaq_f32(v_m8, v_m5, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-                        float32x4_t v_wf = vrecpq_f32(vmlaq_f32(v_yw, v_m2, v_x));
-                        v_src_xf = vmulq_f32(v_wf, v_src_xf);
-                        v_src_yf = vmulq_f32(v_wf, v_src_yf);
-
-                        int32x4_t v_src_x = vcvtq_s32_f32(v_src_xf);
-                        int32x4_t v_src_y = vcvtq_s32_f32(v_src_yf);
-
-                        float32x4x2_t v_coeff;
-                        v_coeff.val[0] = vsubq_f32(v_src_xf, vcvtq_f32_s32(v_src_x));
-                        v_coeff.val[1] = vsubq_f32(v_src_yf, vcvtq_f32_s32(v_src_y));
-                        uint32x4_t v_maskx = vcltq_f32(v_coeff.val[0], v_zero4f);
-                        uint32x4_t v_masky = vcltq_f32(v_coeff.val[1], v_zero4f);
-                        v_coeff.val[0] = vbslq_f32(v_maskx, vaddq_f32(v_one4f, v_coeff.val[0]), v_coeff.val[0]);
-                        v_coeff.val[1] = vbslq_f32(v_masky, vaddq_f32(v_one4f, v_coeff.val[1]), v_coeff.val[1]);
-                        v_src_x = vbslq_s32(v_maskx, vsubq_s32(v_src_x, v_1), v_src_x);
-                        v_src_y = vbslq_s32(v_masky, vsubq_s32(v_src_y, v_1), v_src_y);
-
-                        int32x4_t v_dst0_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, v_src_x));
-                        int32x4_t v_dst0_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, v_src_y));
-                        int32x4_t v_dst1_x = vmaxq_s32(v_zero4, vminq_s32(v_width4, vaddq_s32(v_1, v_src_x)));
-                        int32x4_t v_dst1_y = vmaxq_s32(v_zero4, vminq_s32(v_height4, vaddq_s32(v_1, v_src_y)));
-
-                        int32x4x4_t v_dst_index;
-                        v_dst_index.val[0] = vmlaq_s32(v_dst0_x, v_dst0_y, v_step4);
-                        v_dst_index.val[1] = vmlaq_s32(v_dst1_x, v_dst0_y, v_step4);
-                        v_dst_index.val[2] = vmlaq_s32(v_dst0_x, v_dst1_y, v_step4);
-                        v_dst_index.val[3] = vmlaq_s32(v_dst1_x, v_dst1_y, v_step4);
-
-                        vst2q_f32(coeff_row + (x << 1), v_coeff);
-                        vst4q_s32(map_row + (x << 2), v_dst_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[3] * y_ + m[6], yy = m[4] * y_ + m[7], yw = m[5] * y_ + m[8];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 w_f = 1.0f / (m[2] * x_ + yw);
-                        f32 src_x_f = (m[0] * x_ + yx) * w_f;
-                        f32 src_y_f = (m[1] * x_ + yy) * w_f;
-
-                        s32 src0_x = (s32)floorf(src_x_f);
-                        s32 src0_y = (s32)floorf(src_y_f);
-
-                        coeff_row[(x << 1) + 0] = src_x_f - src0_x;
-                        coeff_row[(x << 1) + 1] = src_y_f - src0_y;
-
-                        s32 src1_y = std::max(0, std::min<s32>(ssize.height - 1, src0_y + 1));
-                        src0_y = std::max(0, std::min<s32>(ssize.height - 1, src0_y));
-                        s32 src1_x = std::max(0, std::min<s32>(ssize.width - 1, src0_x + 1));
-                        src0_x = std::max(0, std::min<s32>(ssize.width - 1, src0_x));
-
-                        map_row[(x << 2) + 0] = src0_y * srcStride + src0_x;
-                        map_row[(x << 2) + 1] = src0_y * srcStride + src1_x;
-                        map_row[(x << 2) + 2] = src1_y * srcStride + src0_x;
-                        map_row[(x << 2) + 3] = src1_y * srcStride + src1_x;
-                    }
-                }
-
-                remapLinearReplicate(Size2D(blockWidth, blockHeight),
-                                     srcBase, &map[0], &coeffs[0],
-                                     getRowPtr(dstBase, dstStride, i) + j, dstStride);
-            }
-        }
-    }
-    else if (borderMode == BORDER_MODE_CONSTANT)
-    {
-        float32x4_t v_zero4 = vdupq_n_f32(0.0f);
-        int32x4_t v_m1_4 = vdupq_n_s32(-1);
-
-        for (size_t i = 0; i < dsize.height; i += BLOCK_SIZE)
-        {
-            size_t blockHeight = std::min<size_t>(BLOCK_SIZE, dsize.height - i);
-            for (size_t j = 0; j < dsize.width; j += BLOCK_SIZE)
-            {
-                size_t blockWidth = std::min<size_t>(BLOCK_SIZE, dsize.width - j);
-
-                // compute table
-                for (size_t y = 0; y < blockHeight; ++y)
-                {
-                    s32 * map_row = getRowPtr(map, blockWidth * sizeof(s32) * 4, y);
-                    f32 * coeff_row = getRowPtr(coeffs, blockWidth * sizeof(f32) * 2, y);
-
-                    size_t x = 0, y_ = y + i;
-                    f32 indeces[4] = { j + 0.0f, j + 1.0f, j + 2.0f, j + 3.0f };
-                    float32x4_t v_x = vld1q_f32(indeces), v_y = vdupq_n_f32(y_);
-                    float32x4_t v_yx = vmlaq_f32(v_m6, v_m3, v_y), v_yy = vmlaq_f32(v_m7, v_m4, v_y),
-                        v_yw = vmlaq_f32(v_m8, v_m5, v_y);
-
-                    for ( ; x + 4 <= blockWidth; x += 4)
-                    {
-                        float32x4_t v_src_xf = vmlaq_f32(v_yx, v_m0, v_x);
-                        float32x4_t v_src_yf = vmlaq_f32(v_yy, v_m1, v_x);
-                        float32x4_t v_wf = vrecpq_f32(vmlaq_f32(v_yw, v_m2, v_x));
-                        v_src_xf = vmulq_f32(v_wf, v_src_xf);
-                        v_src_yf = vmulq_f32(v_wf, v_src_yf);
-
-                        int32x4_t v_src_x0 = vcvtq_s32_f32(v_src_xf);
-                        int32x4_t v_src_y0 = vcvtq_s32_f32(v_src_yf);
-
-                        float32x4x2_t v_coeff;
-                        v_coeff.val[0] = vsubq_f32(v_src_xf, vcvtq_f32_s32(v_src_x0));
-                        v_coeff.val[1] = vsubq_f32(v_src_yf, vcvtq_f32_s32(v_src_y0));
-                        uint32x4_t v_maskx = vcltq_f32(v_coeff.val[0], v_zero4f);
-                        uint32x4_t v_masky = vcltq_f32(v_coeff.val[1], v_zero4f);
-                        v_coeff.val[0] = vbslq_f32(v_maskx, vaddq_f32(v_one4f, v_coeff.val[0]), v_coeff.val[0]);
-                        v_coeff.val[1] = vbslq_f32(v_masky, vaddq_f32(v_one4f, v_coeff.val[1]), v_coeff.val[1]);
-                        v_src_x0 = vbslq_s32(v_maskx, vsubq_s32(v_src_x0, v_1), v_src_x0);
-                        v_src_y0 = vbslq_s32(v_masky, vsubq_s32(v_src_y0, v_1), v_src_y0);
-
-                        int32x4_t v_src_x1 = vaddq_s32(v_src_x0, v_1);
-                        int32x4_t v_src_y1 = vaddq_s32(v_src_y0, v_1);
-
-                        int32x4x4_t v_dst_index;
-                        v_dst_index.val[0] = vmlaq_s32(v_src_x0, v_src_y0, v_step4);
-                        v_dst_index.val[1] = vmlaq_s32(v_src_x1, v_src_y0, v_step4);
-                        v_dst_index.val[2] = vmlaq_s32(v_src_x0, v_src_y1, v_step4);
-                        v_dst_index.val[3] = vmlaq_s32(v_src_x1, v_src_y1, v_step4);
-
-                        uint32x4_t v_mask_x0 = vandq_u32(vcgeq_f32(v_src_xf, v_zero4), vcleq_s32(v_src_x0, v_width4));
-                        uint32x4_t v_mask_x1 = vandq_u32(vcgeq_f32(vaddq_f32(v_src_xf, v_one4f), v_zero4), vcleq_s32(v_src_x1, v_width4));
-                        uint32x4_t v_mask_y0 = vandq_u32(vcgeq_f32(v_src_yf, v_zero4), vcleq_s32(v_src_y0, v_height4));
-                        uint32x4_t v_mask_y1 = vandq_u32(vcgeq_f32(vaddq_f32(v_src_yf, v_one4f), v_zero4), vcleq_s32(v_src_y1, v_height4));
-
-                        v_dst_index.val[0] = vbslq_s32(vandq_u32(v_mask_x0, v_mask_y0), v_dst_index.val[0], v_m1_4);
-                        v_dst_index.val[1] = vbslq_s32(vandq_u32(v_mask_x1, v_mask_y0), v_dst_index.val[1], v_m1_4);
-                        v_dst_index.val[2] = vbslq_s32(vandq_u32(v_mask_x0, v_mask_y1), v_dst_index.val[2], v_m1_4);
-                        v_dst_index.val[3] = vbslq_s32(vandq_u32(v_mask_x1, v_mask_y1), v_dst_index.val[3], v_m1_4);
-
-                        vst2q_f32(coeff_row + (x << 1), v_coeff);
-                        vst4q_s32(map_row + (x << 2), v_dst_index);
-
-                        v_x = vaddq_f32(v_x, v_4);
-                    }
-
-                    f32 yx = m[3] * y_ + m[6], yy = m[4] * y_ + m[7], yw = m[5] * y_ + m[8];
-                    for (ptrdiff_t x_ = x + j; x < blockWidth; ++x, ++x_)
-                    {
-                        f32 w_f = 1.0f / (m[2] * x_ + yw);
-                        f32 src_x_f = (m[0] * x_ + yx) * w_f;
-                        f32 src_y_f = (m[1] * x_ + yy) * w_f;
-
-                        s32 src0_x = (s32)floorf(src_x_f), src1_x = src0_x + 1;
-                        s32 src0_y = (s32)floorf(src_y_f), src1_y = src0_y + 1;
-
-                        coeff_row[(x << 1) + 0] = src_x_f - src0_x;
-                        coeff_row[(x << 1) + 1] = src_y_f - src0_y;
-
-                        map_row[(x << 2) + 0] = (src0_x >= 0) && (src0_x < (s32)ssize.width) &&
-                                                (src0_y >= 0) && (src0_y < (s32)ssize.height) ? src0_y * srcStride + src0_x : -1;
-                        map_row[(x << 2) + 1] = (src1_x >= 0) && (src1_x < (s32)ssize.width) &&
-                                                (src0_y >= 0) && (src0_y < (s32)ssize.height) ? src0_y * srcStride + src1_x : -1;
-                        map_row[(x << 2) + 2] = (src0_x >= 0) && (src0_x < (s32)ssize.width) &&
-                                                (src1_y >= 0) && (src1_y < (s32)ssize.height) ? src1_y * srcStride + src0_x : -1;
-                        map_row[(x << 2) + 3] = (src1_x >= 0) && (src1_x < (s32)ssize.width) &&
-                                                (src1_y >= 0) && (src1_y < (s32)ssize.height) ? src1_y * srcStride + src1_x : -1;
-                    }
-                }
-
-                remapLinearConst(Size2D(blockWidth, blockHeight),
-                                 srcBase, &map[0], &coeffs[0],
-                                 getRowPtr(dstBase, dstStride, i) + j, dstStride, borderValue);
-            }
-        }
-    }
-#else
-    (void)ssize;
-    (void)dsize;
-    (void)srcBase;
-    (void)srcStride;
-    (void)m;
-    (void)dstBase;
-    (void)dstStride;
-    (void)borderMode;
-    (void)borderValue;
-#endif
-}
-
-} // namespace CAROTENE_NS
--- a/3rdparty/ffmpeg/.gitignore
+++ b/3rdparty/ffmpeg/.gitignore
@ -1,3 +0,0 @@
-downloads/
-*.dll
-ffmpeg_version.cmake
--- a/3rdparty/ffmpeg/ffmpeg.cmake
+++ b/3rdparty/ffmpeg/ffmpeg.cmake
@ -1,25 +0,0 @@
-# Binary branch name: ffmpeg/master_20160715
-# Binaries were created for OpenCV: 0e6aa189cb9a9642b0ae7983d301693516faad5d
-set(FFMPEG_BINARIES_COMMIT "7eef9080d3271c7547d303fa839a62e1124ff1e6")
-set(FFMPEG_FILE_HASH_BIN32 "3bb2a8388af90adf6c762210e696400d")
-set(FFMPEG_FILE_HASH_BIN64 "ebcfc963f0a94f7e83d58d60eaf23849")
-set(FFMPEG_FILE_HASH_CMAKE "f99941d10c1e87bf16b9055e8fc91ab2")
-
-set(FFMPEG_DOWNLOAD_URL ${OPENCV_FFMPEG_URL};$ENV{OPENCV_FFMPEG_URL};https://raw.githubusercontent.com/opencv/opencv_3rdparty/${FFMPEG_BINARIES_COMMIT}/ffmpeg/)
-
-ocv_download(PACKAGE opencv_ffmpeg.dll
-             HASH ${FFMPEG_FILE_HASH_BIN32}
-             URL ${FFMPEG_DOWNLOAD_URL}
-             DESTINATION_DIR ${CMAKE_CURRENT_LIST_DIR})
-
-ocv_download(PACKAGE opencv_ffmpeg_64.dll
-             HASH ${FFMPEG_FILE_HASH_BIN64}
-             URL ${FFMPEG_DOWNLOAD_URL}
-             DESTINATION_DIR ${CMAKE_CURRENT_LIST_DIR})
-
-ocv_download(PACKAGE ffmpeg_version.cmake
-             HASH ${FFMPEG_FILE_HASH_CMAKE}
-             URL ${FFMPEG_DOWNLOAD_URL}
-             DESTINATION_DIR ${CMAKE_CURRENT_LIST_DIR})
-
-include(${CMAKE_CURRENT_LIST_DIR}/ffmpeg_version.cmake)
--- a/3rdparty/ffmpeg/license.txt
+++ b/3rdparty/ffmpeg/license.txt
@ -1,520 +0,0 @@
-              Copyright (C) 2001 Fabrice Bellard
-
-    FFmpeg is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    FFmpeg is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with FFmpeg; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-
-==================================================================================
-
-                  GNU LESSER GENERAL PUBLIC LICENSE
-                       Version 2.1, February 1999
-
- Copyright (C) 1991, 1999 Free Software Foundation, Inc.
- 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
- Everyone is permitted to copy and distribute verbatim copies
- of this license document, but changing it is not allowed.
-
-[This is the first released version of the Lesser GPL.  It also counts
- as the successor of the GNU Library Public License, version 2, hence
- the version number 2.1.]
-
-                            Preamble
-
-  The licenses for most software are designed to take away your
-freedom to share and change it.  By contrast, the GNU General Public
-Licenses are intended to guarantee your freedom to share and change
-free software--to make sure the software is free for all its users.
-
-  This license, the Lesser General Public License, applies to some
-specially designated software packages--typically libraries--of the
-Free Software Foundation and other authors who decide to use it.  You
-can use it too, but we suggest you first think carefully about whether
-this license or the ordinary General Public License is the better
-strategy to use in any particular case, based on the explanations below.
-
-  When we speak of free software, we are referring to freedom of use,
-not price.  Our General Public Licenses are designed to make sure that
-you have the freedom to distribute copies of free software (and charge
-for this service if you wish); that you receive source code or can get
-it if you want it; that you can change the software and use pieces of
-it in new free programs; and that you are informed that you can do
-these things.
-
-  To protect your rights, we need to make restrictions that forbid
-distributors to deny you these rights or to ask you to surrender these
-rights.  These restrictions translate to certain responsibilities for
-you if you distribute copies of the library or if you modify it.
-
-  For example, if you distribute copies of the library, whether gratis
-or for a fee, you must give the recipients all the rights that we gave
-you.  You must make sure that they, too, receive or can get the source
-code.  If you link other code with the library, you must provide
-complete object files to the recipients, so that they can relink them
-with the library after making changes to the library and recompiling
-it.  And you must show them these terms so they know their rights.
-
-  We protect your rights with a two-step method: (1) we copyright the
-library, and (2) we offer you this license, which gives you legal
-permission to copy, distribute and/or modify the library.
-
-  To protect each distributor, we want to make it very clear that
-there is no warranty for the free library.  Also, if the library is
-modified by someone else and passed on, the recipients should know
-that what they have is not the original version, so that the original
-author's reputation will not be affected by problems that might be
-introduced by others.
-
-  Finally, software patents pose a constant threat to the existence of
-any free program.  We wish to make sure that a company cannot
-effectively restrict the users of a free program by obtaining a
-restrictive license from a patent holder.  Therefore, we insist that
-any patent license obtained for a version of the library must be
-consistent with the full freedom of use specified in this license.
-
-  Most GNU software, including some libraries, is covered by the
-ordinary GNU General Public License.  This license, the GNU Lesser
-General Public License, applies to certain designated libraries, and
-is quite different from the ordinary General Public License.  We use
-this license for certain libraries in order to permit linking those
-libraries into non-free programs.
-
-  When a program is linked with a library, whether statically or using
-a shared library, the combination of the two is legally speaking a
-combined work, a derivative of the original library.  The ordinary
-General Public License therefore permits such linking only if the
-entire combination fits its criteria of freedom.  The Lesser General
-Public License permits more lax criteria for linking other code with
-the library.
-
-  We call this license the "Lesser" General Public License because it
-does Less to protect the user's freedom than the ordinary General
-Public License.  It also provides other free software developers Less
-of an advantage over competing non-free programs.  These disadvantages
-are the reason we use the ordinary General Public License for many
-libraries.  However, the Lesser license provides advantages in certain
-special circumstances.
-
-  For example, on rare occasions, there may be a special need to
-encourage the widest possible use of a certain library, so that it becomes
-a de-facto standard.  To achieve this, non-free programs must be
-allowed to use the library.  A more frequent case is that a free
-library does the same job as widely used non-free libraries.  In this
-case, there is little to gain by limiting the free library to free
-software only, so we use the Lesser General Public License.
-
-  In other cases, permission to use a particular library in non-free
-programs enables a greater number of people to use a large body of
-free software.  For example, permission to use the GNU C Library in
-non-free programs enables many more people to use the whole GNU
-operating system, as well as its variant, the GNU/Linux operating
-system.
-
-  Although the Lesser General Public License is Less protective of the
-users' freedom, it does ensure that the user of a program that is
-linked with the Library has the freedom and the wherewithal to run
-that program using a modified version of the Library.
-
-  The precise terms and conditions for copying, distribution and
-modification follow.  Pay close attention to the difference between a
-"work based on the library" and a "work that uses the library".  The
-former contains code derived from the library, whereas the latter must
-be combined with the library in order to run.
-
-                  GNU LESSER GENERAL PUBLIC LICENSE
-   TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION
-
-  0. This License Agreement applies to any software library or other
-program which contains a notice placed by the copyright holder or
-other authorized party saying it may be distributed under the terms of
-this Lesser General Public License (also called "this License").
-Each licensee is addressed as "you".
-
-  A "library" means a collection of software functions and/or data
-prepared so as to be conveniently linked with application programs
-(which use some of those functions and data) to form executables.
-
-  The "Library", below, refers to any such software library or work
-which has been distributed under these terms.  A "work based on the
-Library" means either the Library or any derivative work under
-copyright law: that is to say, a work containing the Library or a
-portion of it, either verbatim or with modifications and/or translated
-straightforwardly into another language.  (Hereinafter, translation is
-included without limitation in the term "modification".)
-
-  "Source code" for a work means the preferred form of the work for
-making modifications to it.  For a library, complete source code means
-all the source code for all modules it contains, plus any associated
-interface definition files, plus the scripts used to control compilation
-and installation of the library.
-
-  Activities other than copying, distribution and modification are not
-covered by this License; they are outside its scope.  The act of
-running a program using the Library is not restricted, and output from
-such a program is covered only if its contents constitute a work based
-on the Library (independent of the use of the Library in a tool for
-writing it).  Whether that is true depends on what the Library does
-and what the program that uses the Library does.
-
-  1. You may copy and distribute verbatim copies of the Library's
-complete source code as you receive it, in any medium, provided that
-you conspicuously and appropriately publish on each copy an
-appropriate copyright notice and disclaimer of warranty; keep intact
-all the notices that refer to this License and to the absence of any
-warranty; and distribute a copy of this License along with the
-Library.
-
-  You may charge a fee for the physical act of transferring a copy,
-and you may at your option offer warranty protection in exchange for a
-fee.
-
-  2. You may modify your copy or copies of the Library or any portion
-of it, thus forming a work based on the Library, and copy and
-distribute such modifications or work under the terms of Section 1
-above, provided that you also meet all of these conditions:
-
-    a) The modified work must itself be a software library.
-
-    b) You must cause the files modified to carry prominent notices
-    stating that you changed the files and the date of any change.
-
-    c) You must cause the whole of the work to be licensed at no
-    charge to all third parties under the terms of this License.
-
-    d) If a facility in the modified Library refers to a function or a
-    table of data to be supplied by an application program that uses
-    the facility, other than as an argument passed when the facility
-    is invoked, then you must make a good faith effort to ensure that,
-    in the event an application does not supply such function or
-    table, the facility still operates, and performs whatever part of
-    its purpose remains meaningful.
-
-    (For example, a function in a library to compute square roots has
-    a purpose that is entirely well-defined independent of the
-    application.  Therefore, Subsection 2d requires that any
-    application-supplied function or table used by this function must
-    be optional: if the application does not supply it, the square
-    root function must still compute square roots.)
-
-These requirements apply to the modified work as a whole.  If
-identifiable sections of that work are not derived from the Library,
-and can be reasonably considered independent and separate works in
-themselves, then this License, and its terms, do not apply to those
-sections when you distribute them as separate works.  But when you
-distribute the same sections as part of a whole which is a work based
-on the Library, the distribution of the whole must be on the terms of
-this License, whose permissions for other licensees extend to the
-entire whole, and thus to each and every part regardless of who wrote
-it.
-
-Thus, it is not the intent of this section to claim rights or contest
-your rights to work written entirely by you; rather, the intent is to
-exercise the right to control the distribution of derivative or
-collective works based on the Library.
-
-In addition, mere aggregation of another work not based on the Library
-with the Library (or with a work based on the Library) on a volume of
-a storage or distribution medium does not bring the other work under
-the scope of this License.
-
-  3. You may opt to apply the terms of the ordinary GNU General Public
-License instead of this License to a given copy of the Library.  To do
-this, you must alter all the notices that refer to this License, so
-that they refer to the ordinary GNU General Public License, version 2,
-instead of to this License.  (If a newer version than version 2 of the
-ordinary GNU General Public License has appeared, then you can specify
-that version instead if you wish.)  Do not make any other change in
-these notices.
-
-  Once this change is made in a given copy, it is irreversible for
-that copy, so the ordinary GNU General Public License applies to all
-subsequent copies and derivative works made from that copy.
-
-  This option is useful when you wish to copy part of the code of
-the Library into a program that is not a library.
-
-  4. You may copy and distribute the Library (or a portion or
-derivative of it, under Section 2) in object code or executable form
-under the terms of Sections 1 and 2 above provided that you accompany
-it with the complete corresponding machine-readable source code, which
-must be distributed under the terms of Sections 1 and 2 above on a
-medium customarily used for software interchange.
-
-  If distribution of object code is made by offering access to copy
-from a designated place, then offering equivalent access to copy the
-source code from the same place satisfies the requirement to
-distribute the source code, even though third parties are not
-compelled to copy the source along with the object code.
-
-  5. A program that contains no derivative of any portion of the
-Library, but is designed to work with the Library by being compiled or
-linked with it, is called a "work that uses the Library".  Such a
-work, in isolation, is not a derivative work of the Library, and
-therefore falls outside the scope of this License.
-
-  However, linking a "work that uses the Library" with the Library
-creates an executable that is a derivative of the Library (because it
-contains portions of the Library), rather than a "work that uses the
-library".  The executable is therefore covered by this License.
-Section 6 states terms for distribution of such executables.
-
-  When a "work that uses the Library" uses material from a header file
-that is part of the Library, the object code for the work may be a
-derivative work of the Library even though the source code is not.
-Whether this is true is especially significant if the work can be
-linked without the Library, or if the work is itself a library.  The
-threshold for this to be true is not precisely defined by law.
-
-  If such an object file uses only numerical parameters, data
-structure layouts and accessors, and small macros and small inline
-functions (ten lines or less in length), then the use of the object
-file is unrestricted, regardless of whether it is legally a derivative
-work.  (Executables containing this object code plus portions of the
-Library will still fall under Section 6.)
-
-  Otherwise, if the work is a derivative of the Library, you may
-distribute the object code for the work under the terms of Section 6.
-Any executables containing that work also fall under Section 6,
-whether or not they are linked directly with the Library itself.
-
-  6. As an exception to the Sections above, you may also combine or
-link a "work that uses the Library" with the Library to produce a
-work containing portions of the Library, and distribute that work
-under terms of your choice, provided that the terms permit
-modification of the work for the customer's own use and reverse
-engineering for debugging such modifications.
-
-  You must give prominent notice with each copy of the work that the
-Library is used in it and that the Library and its use are covered by
-this License.  You must supply a copy of this License.  If the work
-during execution displays copyright notices, you must include the
-copyright notice for the Library among them, as well as a reference
-directing the user to the copy of this License.  Also, you must do one
-of these things:
-
-    a) Accompany the work with the complete corresponding
-    machine-readable source code for the Library including whatever
-    changes were used in the work (which must be distributed under
-    Sections 1 and 2 above); and, if the work is an executable linked
-    with the Library, with the complete machine-readable "work that
-    uses the Library", as object code and/or source code, so that the
-    user can modify the Library and then relink to produce a modified
-    executable containing the modified Library.  (It is understood
-    that the user who changes the contents of definitions files in the
-    Library will not necessarily be able to recompile the application
-    to use the modified definitions.)
-
-    b) Use a suitable shared library mechanism for linking with the
-    Library.  A suitable mechanism is one that (1) uses at run time a
-    copy of the library already present on the user's computer system,
-    rather than copying library functions into the executable, and (2)
-    will operate properly with a modified version of the library, if
-    the user installs one, as long as the modified version is
-    interface-compatible with the version that the work was made with.
-
-    c) Accompany the work with a written offer, valid for at
-    least three years, to give the same user the materials
-    specified in Subsection 6a, above, for a charge no more
-    than the cost of performing this distribution.
-
-    d) If distribution of the work is made by offering access to copy
-    from a designated place, offer equivalent access to copy the above
-    specified materials from the same place.
-
-    e) Verify that the user has already received a copy of these
-    materials or that you have already sent this user a copy.
-
-  For an executable, the required form of the "work that uses the
-Library" must include any data and utility programs needed for
-reproducing the executable from it.  However, as a special exception,
-the materials to be distributed need not include anything that is
-normally distributed (in either source or binary form) with the major
-components (compiler, kernel, and so on) of the operating system on
-which the executable runs, unless that component itself accompanies
-the executable.
-
-  It may happen that this requirement contradicts the license
-restrictions of other proprietary libraries that do not normally
-accompany the operating system.  Such a contradiction means you cannot
-use both them and the Library together in an executable that you
-distribute.
-
-  7. You may place library facilities that are a work based on the
-Library side-by-side in a single library together with other library
-facilities not covered by this License, and distribute such a combined
-library, provided that the separate distribution of the work based on
-the Library and of the other library facilities is otherwise
-permitted, and provided that you do these two things:
-
-    a) Accompany the combined library with a copy of the same work
-    based on the Library, uncombined with any other library
-    facilities.  This must be distributed under the terms of the
-    Sections above.
-
-    b) Give prominent notice with the combined library of the fact
-    that part of it is a work based on the Library, and explaining
-    where to find the accompanying uncombined form of the same work.
-
-  8. You may not copy, modify, sublicense, link with, or distribute
-the Library except as expressly provided under this License.  Any
-attempt otherwise to copy, modify, sublicense, link with, or
-distribute the Library is void, and will automatically terminate your
-rights under this License.  However, parties who have received copies,
-or rights, from you under this License will not have their licenses
-terminated so long as such parties remain in full compliance.
-
-  9. You are not required to accept this License, since you have not
-signed it.  However, nothing else grants you permission to modify or
-distribute the Library or its derivative works.  These actions are
-prohibited by law if you do not accept this License.  Therefore, by
-modifying or distributing the Library (or any work based on the
-Library), you indicate your acceptance of this License to do so, and
-all its terms and conditions for copying, distributing or modifying
-the Library or works based on it.
-
-  10. Each time you redistribute the Library (or any work based on the
-Library), the recipient automatically receives a license from the
-original licensor to copy, distribute, link with or modify the Library
-subject to these terms and conditions.  You may not impose any further
-restrictions on the recipients' exercise of the rights granted herein.
-You are not responsible for enforcing compliance by third parties with
-this License.
-
-  11. If, as a consequence of a court judgment or allegation of patent
-infringement or for any other reason (not limited to patent issues),
-conditions are imposed on you (whether by court order, agreement or
-otherwise) that contradict the conditions of this License, they do not
-excuse you from the conditions of this License.  If you cannot
-distribute so as to satisfy simultaneously your obligations under this
-License and any other pertinent obligations, then as a consequence you
-may not distribute the Library at all.  For example, if a patent
-license would not permit royalty-free redistribution of the Library by
-all those who receive copies directly or indirectly through you, then
-the only way you could satisfy both it and this License would be to
-refrain entirely from distribution of the Library.
-
-If any portion of this section is held invalid or unenforceable under any
-particular circumstance, the balance of the section is intended to apply,
-and the section as a whole is intended to apply in other circumstances.
-
-It is not the purpose of this section to induce you to infringe any
-patents or other property right claims or to contest validity of any
-such claims; this section has the sole purpose of protecting the
-integrity of the free software distribution system which is
-implemented by public license practices.  Many people have made
-generous contributions to the wide range of software distributed
-through that system in reliance on consistent application of that
-system; it is up to the author/donor to decide if he or she is willing
-to distribute software through any other system and a licensee cannot
-impose that choice.
-
-This section is intended to make thoroughly clear what is believed to
-be a consequence of the rest of this License.
-
-  12. If the distribution and/or use of the Library is restricted in
-certain countries either by patents or by copyrighted interfaces, the
-original copyright holder who places the Library under this License may add
-an explicit geographical distribution limitation excluding those countries,
-so that distribution is permitted only in or among countries not thus
-excluded.  In such case, this License incorporates the limitation as if
-written in the body of this License.
-
-  13. The Free Software Foundation may publish revised and/or new
-versions of the Lesser General Public License from time to time.
-Such new versions will be similar in spirit to the present version,
-but may differ in detail to address new problems or concerns.
-
-Each version is given a distinguishing version number.  If the Library
-specifies a version number of this License which applies to it and
-"any later version", you have the option of following the terms and
-conditions either of that version or of any later version published by
-the Free Software Foundation.  If the Library does not specify a
-license version number, you may choose any version ever published by
-the Free Software Foundation.
-
-  14. If you wish to incorporate parts of the Library into other free
-programs whose distribution conditions are incompatible with these,
-write to the author to ask for permission.  For software which is
-copyrighted by the Free Software Foundation, write to the Free
-Software Foundation; we sometimes make exceptions for this.  Our
-decision will be guided by the two goals of preserving the free status
-of all derivatives of our free software and of promoting the sharing
-and reuse of software generally.
-
-                            NO WARRANTY
-
-  15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO
-WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW.
-EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR
-OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY
-KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE
-LIBRARY IS WITH YOU.  SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME
-THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
-
-  16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN
-WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY
-AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU
-FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR
-CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE
-LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING
-RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A
-FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF
-SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH
-DAMAGES.
-
-                     END OF TERMS AND CONDITIONS
-
-           How to Apply These Terms to Your New Libraries
-
-  If you develop a new library, and you want it to be of the greatest
-possible use to the public, we recommend making it free software that
-everyone can redistribute and change.  You can do so by permitting
-redistribution under these terms (or, alternatively, under the terms of the
-ordinary General Public License).
-
-  To apply these terms, attach the following notices to the library.  It is
-safest to attach them to the start of each source file to most effectively
-convey the exclusion of warranty; and each file should have at least the
-"copyright" line and a pointer to where the full notice is found.
-
-    <one line to give the library's name and a brief idea of what it does.>
-    Copyright (C) <year>  <name of author>
-
-    This library is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Lesser General Public
-    License as published by the Free Software Foundation; either
-    version 2.1 of the License, or (at your option) any later version.
-
-    This library is distributed in the hope that it will be useful,
-    but WITHOUT ANY WARRANTY; without even the implied warranty of
-    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-    Lesser General Public License for more details.
-
-    You should have received a copy of the GNU Lesser General Public
-    License along with this library; if not, write to the Free Software
-    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
-
-Also add information on how to contact you by electronic and paper mail.
-
-You should also get your employer (if you work as a programmer) or your
-school, if any, to sign a "copyright disclaimer" for the library, if
-necessary.  Here is a sample; alter the names:
-
-  Yoyodyne, Inc., hereby disclaims all copyright interest in the
-  library `Frob' (a library for tweaking knobs) written by James Random Hacker.
-
-  <signature of Ty Coon>, 1 April 1990
-  Ty Coon, President of Vice
-
-That's all there is to it!
--- a/3rdparty/ffmpeg/readme.txt
+++ b/3rdparty/ffmpeg/readme.txt
@ -1,38 +0,0 @@
-* On Linux and other Unix flavors OpenCV uses default or user-built ffmpeg/libav libraries.
-  If user builds ffmpeg/libav from source and wants OpenCV to stay BSD library, not GPL/LGPL,
-  he/she should use --enabled-shared configure flag and make sure that no GPL components are
-  enabled (some notable examples are x264 (H264 encoder) and libac3 (Dolby AC3 audio codec)).
-  See https://www.ffmpeg.org/legal.html for details.
-
-  If you want to play very safe and do not want to use FFMPEG at all, regardless of whether it's installed on
-  your system or not, configure and build OpenCV using CMake with WITH_FFMPEG=OFF flag. OpenCV will then use
-  AVFoundation (OSX), GStreamer (Linux) or other available backends supported by opencv_videoio module.
-
-  There is also our self-contained motion jpeg codec, which you can use without any worries.
-  It handles CV_FOURCC('M', 'J', 'P', 'G') streams within an AVI container (".avi").
-
-* On Windows OpenCV uses pre-built ffmpeg binaries, built with proper flags (without GPL components) and
-  wrapped with simple, stable OpenCV-compatible API.
-  The binaries are opencv_ffmpeg.dll (version for 32-bit Windows) and
-  opencv_ffmpeg_64.dll (version for 64-bit Windows).
-
-  See build_win32.txt for the build instructions, if you want to rebuild opencv_ffmpeg*.dll from scratch.
-
-  The pre-built opencv_ffmpeg*.dll is:
-  * LGPL library, not BSD libraries.
-  * Loaded at runtime by opencv_videoio module.
-    If it succeeds, ffmpeg can be used to decode/encode videos;
-    otherwise, other API is used.
-
-  FFMPEG build contains H264 encoder based on the OpenH264 library, that should be installed separatelly.
-  OpenH264 Video Codec provided by Cisco Systems, Inc.
-  See https://github.com/cisco/openh264/releases for details and OpenH264 license.
-  Downloaded binary file can be placed into global system path (System32 or SysWOW64) or near application binaries.
-  You can also specify location of binary file via OPENH264_LIBRARY_PATH environment variable.
-
-  If LGPL/GPL software can not be supplied with your OpenCV-based product, simply exclude
-  opencv_ffmpeg*.dll from your distribution; OpenCV will stay fully functional except for the ability to
-  decode/encode videos using FFMPEG (though, it may still be able to do that using other API,
-  such as Video for Windows, Windows Media Foundation or our self-contained motion jpeg codec).
-
-  See license.txt for the FFMPEG copyright notice and the licensing terms.
--- a/3rdparty/gtest/CMakeLists.txt
+++ b/3rdparty/gtest/CMakeLists.txt
@ -0,0 +1,29 @@
+project(opencv_gtest)
+
+# List of C++ files:
+
+include_directories("${CMAKE_CURRENT_SOURCE_DIR}/..")
+
+# The .cpp files:
+file(GLOB lib_srcs *.cpp)
+file(GLOB lib_hdrs *.h)
+
+# ----------------------------------------------------------------------------------
+# 				Define the library target:
+# ----------------------------------------------------------------------------------
+
+set(the_target "opencv_gtest")
+
+add_library(${the_target} STATIC ${lib_srcs} ${lib_hdrs})
+
+if(UNIX)
+     if(CMAKE_COMPILER_IS_GNUCXX OR CV_ICC)
+        set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fPIC")
+     endif()
+endif()
+
+set_target_properties(${the_target}
+	PROPERTIES OUTPUT_NAME "${the_target}"
+	DEBUG_POSTFIX "${OPENCV_DEBUG_POSTFIX}"
+	ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/3rdparty/lib
+	)
--- a/3rdparty/gtest/README
+++ b/3rdparty/gtest/README
@ -0,0 +1,417 @@
+Google C++ Testing Framework
+============================
+
+http://code.google.com/p/googletest/
+
+Overview
+--------
+
+Google's framework for writing C++ tests on a variety of platforms
+(Linux, Mac OS X, Windows, Windows CE, Symbian, etc).  Based on the
+xUnit architecture.  Supports automatic test discovery, a rich set of
+assertions, user-defined assertions, death tests, fatal and non-fatal
+failures, various options for running the tests, and XML test report
+generation.
+
+Please see the project page above for more information as well as the
+mailing list for questions, discussions, and development.  There is
+also an IRC channel on OFTC (irc.oftc.net) #gtest available.  Please
+join us!
+
+Requirements for End Users
+--------------------------
+
+Google Test is designed to have fairly minimal requirements to build
+and use with your projects, but there are some.  Currently, we support
+Linux, Windows, Mac OS X, and Cygwin.  We will also make our best
+effort to support other platforms (e.g. Solaris, AIX, and z/OS).
+However, since core members of the Google Test project have no access
+to these platforms, Google Test may have outstanding issues there.  If
+you notice any problems on your platform, please notify
+googletestframework@googlegroups.com.  Patches for fixing them are
+even more welcome!
+
+### Linux Requirements ###
+
+These are the base requirements to build and use Google Test from a source
+package (as described below):
+  * GNU-compatible Make or gmake
+  * POSIX-standard shell
+  * POSIX(-2) Regular Expressions (regex.h)
+  * A C++98-standard-compliant compiler
+
+### Windows Requirements ###
+
+  * Microsoft Visual C++ 7.1 or newer
+
+### Cygwin Requirements ###
+
+  * Cygwin 1.5.25-14 or newer
+
+### Mac OS X Requirements ###
+
+  * Mac OS X 10.4 Tiger or newer
+  * Developer Tools Installed
+
+Also, you'll need CMake 2.6.4 or higher if you want to build the
+samples using the provided CMake script, regardless of the platform.
+
+Requirements for Contributors
+-----------------------------
+
+We welcome patches.  If you plan to contribute a patch, you need to
+build Google Test and its own tests from an SVN checkout (described
+below), which has further requirements:
+
+  * Python version 2.3 or newer (for running some of the tests and
+    re-generating certain source files from templates)
+  * CMake 2.6.4 or newer
+
+Getting the Source
+------------------
+
+There are two primary ways of getting Google Test's source code: you
+can download a stable source release in your preferred archive format,
+or directly check out the source from our Subversion (SVN) repositary.
+The SVN checkout requires a few extra steps and some extra software
+packages on your system, but lets you track the latest development and
+make patches much more easily, so we highly encourage it.
+
+### Source Package ###
+
+Google Test is released in versioned source packages which can be
+downloaded from the download page [1].  Several different archive
+formats are provided, but the only difference is the tools used to
+manipulate them, and the size of the resulting file.  Download
+whichever you are most comfortable with.
+
+  [1] http://code.google.com/p/googletest/downloads/list
+
+Once the package is downloaded, expand it using whichever tools you
+prefer for that type.  This will result in a new directory with the
+name "gtest-X.Y.Z" which contains all of the source code.  Here are
+some examples on Linux:
+
+  tar -xvzf gtest-X.Y.Z.tar.gz
+  tar -xvjf gtest-X.Y.Z.tar.bz2
+  unzip gtest-X.Y.Z.zip
+
+### SVN Checkout ###
+
+To check out the main branch (also known as the "trunk") of Google
+Test, run the following Subversion command:
+
+  svn checkout http://googletest.googlecode.com/svn/trunk/ gtest-svn
+
+Setting up the Build
+--------------------
+
+To build Google Test and your tests that use it, you need to tell your
+build system where to find its headers and source files.  The exact
+way to do it depends on which build system you use, and is usually
+straightforward.
+
+### Generic Build Instructions ###
+
+Suppose you put Google Test in directory ${GTEST_DIR}.  To build it,
+create a library build target (or a project as called by Visual Studio
+and Xcode) to compile
+
+  ${GTEST_DIR}/src/gtest-all.cc
+
+with
+
+  ${GTEST_DIR}/include and ${GTEST_DIR}
+
+in the header search path.  Assuming a Linux-like system and gcc,
+something like the following will do:
+
+  g++ -I${GTEST_DIR}/include -I${GTEST_DIR} -c ${GTEST_DIR}/src/gtest-all.cc
+  ar -rv libgtest.a gtest-all.o
+
+Next, you should compile your test source file with
+${GTEST_DIR}/include in the header search path, and link it with gtest
+and any other necessary libraries:
+
+  g++ -I${GTEST_DIR}/include path/to/your_test.cc libgtest.a -o your_test
+
+As an example, the make/ directory contains a Makefile that you can
+use to build Google Test on systems where GNU make is available
+(e.g. Linux, Mac OS X, and Cygwin).  It doesn't try to build Google
+Test's own tests.  Instead, it just builds the Google Test library and
+a sample test.  You can use it as a starting point for your own build
+script.
+
+If the default settings are correct for your environment, the
+following commands should succeed:
+
+  cd ${GTEST_DIR}/make
+  make
+  ./sample1_unittest
+
+If you see errors, try to tweak the contents of make/Makefile to make
+them go away.  There are instructions in make/Makefile on how to do
+it.
+
+### Using CMake ###
+
+Google Test comes with a CMake build script (CMakeLists.txt) that can
+be used on a wide range of platforms ("C" stands for cross-platofrm.).
+If you don't have CMake installed already, you can download it for
+free from http://www.cmake.org/.
+
+CMake works by generating native makefiles or build projects that can
+be used in the compiler environment of your choice.  The typical
+workflow starts with:
+
+  mkdir mybuild       # Create a directory to hold the build output.
+  cd mybuild
+  cmake ${GTEST_DIR}  # Generate native build scripts.
+
+If you want to build Google Test's samples, you should replace the
+last command with
+
+  cmake -Dbuild_gtest_samples=ON ${GTEST_DIR}
+
+If you are on a *nix system, you should now see a Makefile in the
+current directory.  Just type 'make' to build gtest.
+
+If you use Windows and have Vistual Studio installed, a gtest.sln file
+and several .vcproj files will be created.  You can then build them
+using Visual Studio.
+
+On Mac OS X with Xcode installed, a .xcodeproj file will be generated.
+
+### Legacy Build Scripts ###
+
+Before settling on CMake, we have been providing hand-maintained build
+projects/scripts for Visual Studio, Xcode, and Autotools.  While we
+continue to provide them for convenience, they are not actively
+maintained any more.  We highly recommend that you follow the
+instructions in the previous two sections to integrate Google Test
+with your existing build system.
+
+If you still need to use the legacy build scripts, here's how:
+
+The msvc\ folder contains two solutions with Visual C++ projects.
+Open the gtest.sln or gtest-md.sln file using Visual Studio, and you
+are ready to build Google Test the same way you build any Visual
+Studio project.  Files that have names ending with -md use DLL
+versions of Microsoft runtime libraries (the /MD or the /MDd compiler
+option).  Files without that suffix use static versions of the runtime
+libraries (the /MT or the /MTd option).  Please note that one must use
+the same option to compile both gtest and the test code.  If you use
+Visual Studio 2005 or above, we recommend the -md version as /MD is
+the default for new projects in these versions of Visual Studio.
+
+On Mac OS X, open the gtest.xcodeproj in the xcode/ folder using
+Xcode.  Build the "gtest" target.  The universal binary framework will
+end up in your selected build directory (selected in the Xcode
+"Preferences..." -> "Building" pane and defaults to xcode/build).
+Alternatively, at the command line, enter:
+
+  xcodebuild
+
+This will build the "Release" configuration of gtest.framework in your
+default build location.  See the "xcodebuild" man page for more
+information about building different configurations and building in
+different locations.
+
+Tweaking Google Test
+--------------------
+
+Google Test can be used in diverse environments.  The default
+configuration may not work (or may not work well) out of the box in
+some environments.  However, you can easily tweak Google Test by
+defining control macros on the compiler command line.  Generally,
+these macros are named like GTEST_XYZ and you define them to either 1
+or 0 to enable or disable a certain feature.
+
+We list the most frequently used macros below.  For a complete list,
+see file include/gtest/internal/gtest-port.h.
+
+### Choosing a TR1 Tuple Library ###
+
+Some Google Test features require the C++ Technical Report 1 (TR1)
+tuple library, which is not yet available with all compilers.  The
+good news is that Google Test implements a subset of TR1 tuple that's
+enough for its own need, and will automatically use this when the
+compiler doesn't provide TR1 tuple.
+
+Usually you don't need to care about which tuple library Google Test
+uses.  However, if your project already uses TR1 tuple, you need to
+tell Google Test to use the same TR1 tuple library the rest of your
+project uses, or the two tuple implementations will clash.  To do
+that, add
+
+  -DGTEST_USE_OWN_TR1_TUPLE=0
+
+to the compiler flags while compiling Google Test and your tests.  If
+you want to force Google Test to use its own tuple library, just add
+
+  -DGTEST_USE_OWN_TR1_TUPLE=1
+
+to the compiler flags instead.
+
+If you don't want Google Test to use tuple at all, add
+
+  -DGTEST_HAS_TR1_TUPLE=0
+
+and all features using tuple will be disabled.
+
+### Multi-threaded Tests ###
+
+Google Test is thread-safe where the pthread library is available.
+After #include <gtest/gtest.h>, you can check the GTEST_IS_THREADSAFE
+macro to see whether this is the case (yes if the macro is #defined to
+1, no if it's undefined.).
+
+If Google Test doesn't correctly detect whether pthread is available
+in your environment, you can force it with
+
+  -DGTEST_HAS_PTHREAD=1
+
+or
+
+  -DGTEST_HAS_PTHREAD=0
+
+When Google Test uses pthread, you may need to add flags to your
+compiler and/or linker to select the pthread library, or you'll get
+link errors.  If you use the CMake script or the deprecated Autotools
+script, this is taken care of for you.  If you use your own build
+script, you'll need to read your compiler and linker's manual to
+figure out what flags to add.
+
+### As a Shared Library (DLL) ###
+
+Google Test is compact, so most users can build and link it as a
+static library for the simplicity.  You can choose to use Google Test
+as a shared library (known as a DLL on Windows) if you prefer.
+
+To compile gtest as a shared library, add
+
+  -DGTEST_CREATE_SHARED_LIBRARY=1
+
+to the compiler flags.  You'll also need to tell the linker to produce
+a shared library instead - consult your linker's manual for how to do
+it.
+
+To compile your tests that use the gtest shared library, add
+
+  -DGTEST_LINKED_AS_SHARED_LIBRARY=1
+
+to the compiler flags.
+
+### Avoiding Macro Name Clashes ###
+
+In C++, macros don't obey namespaces.  Therefore two libraries that
+both define a macro of the same name will clash if you #include both
+definitions.  In case a Google Test macro clashes with another
+library, you can force Google Test to rename its macro to avoid the
+conflict.
+
+Specifically, if both Google Test and some other code define macro
+FOO, you can add
+
+  -DGTEST_DONT_DEFINE_FOO=1
+
+to the compiler flags to tell Google Test to change the macro's name
+from FOO to GTEST_FOO.  Currently FOO can be FAIL, SUCCEED, or TEST.
+For example, with -DGTEST_DONT_DEFINE_TEST=1, you'll need to write
+
+  GTEST_TEST(SomeTest, DoesThis) { ... }
+
+instead of
+
+  TEST(SomeTest, DoesThis) { ... }
+
+in order to define a test.
+
+Upgrating from an Earlier Version
+---------------------------------
+
+We strive to keep Google Test releases backward compatible.
+Sometimes, though, we have to make some breaking changes for the
+users' long-term benefits.  This section describes what you'll need to
+do if you are upgrading from an earlier version of Google Test.
+
+### Upgrading from 1.3.0 or Earlier ###
+
+You may need to explicitly enable or disable Google Test's own TR1
+tuple library.  See the instructions in section "Choosing a TR1 Tuple
+Library".
+
+### Upgrading from 1.4.0 or Earlier ###
+
+The Autotools build script (configure + make) is no longer officially
+supportted.  You are encouraged to migrate to your own build system or
+use CMake.  If you still need to use Autotools, you can find
+instructions in the README file from Google Test 1.4.0.
+
+On platforms where the pthread library is available, Google Test uses
+it in order to be thread-safe.  See the "Multi-threaded Tests" section
+for what this means to your build script.
+
+If you use Microsoft Visual C++ 7.1 with exceptions disabled, Google
+Test will no longer compile.  This should affect very few people, as a
+large portion of STL (including <string>) doesn't compile in this mode
+anyway.  We decided to stop supporting it in order to greatly simplify
+Google Test's implementation.
+
+Developing Google Test
+----------------------
+
+This section discusses how to make your own changes to Google Test.
+
+### Testing Google Test Itself ###
+
+To make sure your changes work as intended and don't break existing
+functionality, you'll want to compile and run Google Test's own tests.
+For that you can use CMake:
+
+  mkdir mybuild
+  cd mybuild
+  cmake -Dbuild_all_gtest_tests=ON ${GTEST_DIR}
+
+Make sure you have Python installed, as some of Google Test's tests
+are written in Python.  If the cmake command complains about not being
+able to find Python ("Could NOT find PythonInterp (missing:
+PYTHON_EXECUTABLE)"), try telling it explicitly where your Python
+executable can be found:
+
+  cmake -DPYTHON_EXECUTABLE=path/to/python -Dbuild_all_gtest_tests=ON \
+      ${GTEST_DIR}
+
+Next, you can build Google Test and all of its own tests.  On *nix,
+this is usually done by 'make'.  To run the tests, do
+
+  make test
+
+All tests should pass.
+
+### Regenerating Source Files ###
+
+Some of Google Test's source files are generated from templates (not
+in the C++ sense) using a script.  A template file is named FOO.pump,
+where FOO is the name of the file it will generate.  For example, the
+file include/gtest/internal/gtest-type-util.h.pump is used to generate
+gtest-type-util.h in the same directory.
+
+Normally you don't need to worry about regenerating the source files,
+unless you need to modify them.  In that case, you should modify the
+corresponding .pump files instead and run the pump.py Python script to
+regenerate them.  You can find pump.py in the scripts/ directory.
+Read the Pump manual [2] for how to use it.
+
+  [2] http://code.google.com/p/googletest/wiki/PumpManual
+
+### Contributing a Patch ###
+
+We welcome patches.  Please read the Google Test developer's guide [3]
+for how you can contribute.  In particular, make sure you have signed
+the Contributor License Agreement, or we won't be able to accept the
+patch.
+
+  [3] http://code.google.com/p/googletest/wiki/GoogleTestDevGuide
+
+Happy testing!
--- a/3rdparty/gtest/gtest.cpp
+++ b/3rdparty/gtest/gtest.cpp
--- a/3rdparty/gtest/gtest.h
+++ b/3rdparty/gtest/gtest.h
--- a/3rdparty/ilmimf/LICENSE
+++ b/3rdparty/ilmimf/LICENSE
@ -0,0 +1,34 @@
+Copyright (c) 2004, Industrial Light & Magic, a division of Lucasfilm
+Entertainment Company Ltd.  Portions contributed and copyright held by
+others as indicated.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above
+      copyright notice, this list of conditions and the following
+      disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided with
+      the distribution.
+
+    * Neither the name of Industrial Light & Magic nor the names of
+      any other contributors to this software may be used to endorse or
+      promote products derived from this software without specific prior
+      written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
--- a/3rdparty/ilmimf/README
+++ b/3rdparty/ilmimf/README
@ -0,0 +1,123 @@
+ABOUT THE OPENEXR LIBRARIES
+----------------------------
+
+Half is a class that encapsulates our 16-bit floating-point format.
+
+IlmThread is a thread abstraction library for use with IlmImf.  It
+currently supports pthreads and Windows threads.
+
+IlmImf is our "EXR" file format for storing 16-bit FP images.
+
+Imath is a math library.  IlmImf only uses a subset of it,
+but we're releasing the full library because it's easier for us to 
+maintain, and we think it'll be useful to others.
+
+Iex is an exception-handling library.
+
+See the IlmImfExamples directory for some code that demonstrates how
+to use the IlmImf library to read and write OpenEXR files.  The doc
+directory contains some high-level documentation and history about the
+OpenEXR format.
+
+If you have questions about using the OpenEXR libraries, you may want
+to join our developer mailing list.  See http://www.openexr.com for
+details.
+
+
+LICENSE
+-------
+
+The OpenEXR source code distribution is free software.  See the file
+named COPYING (included in this distribution) for details.
+
+
+WHAT'S INCLUDED
+---------------
+
+Besides the core OpenEXR libraries, the release includes several 
+utilities for reading, writing, viewing, and manipulating OpenEXR 
+images.  These include:
+
+  * exrdisplay, an image viewer.
+  * exrheader, a utility for dumping header information.
+  * exrstdattr, a utility for modifying OpenEXR standard attributes.
+  * exrmaketiled, for generating tiled and rip/mipmapped images.
+  * exrenvmap, for creating OpenEXR environment maps.
+  * exrmakepreview, for creating preview images for OpenEXR files.
+
+exrdisplay requires FLTK 1.1 or greater and OpenGL.  exrdisplay
+supports fragment shaders if you have the Nvidia Cg SDK and a graphics
+card capable of running fp30 profile fragment shaders.  See
+exrdisplay/README for details.
+
+We have also released an OpenEXR display driver for Renderman, a file
+I/O plugin for Shake, and a file I/O plugin for Adobe Photoshop (on
+both Windows and MacOS).  These are packaged separately.  Go to
+http://www.openexr.com to download them.  NOTE: the most recent
+versions of these applications now have native support for OpenEXR, so
+you should only use our open-source versions of the plugins if you
+have an older version of the application.
+
+
+BUILDING OPENEXR
+----------------
+
+Building OpenEXR requires the zlib library.  If you want to build the
+'exrdisplay' image viewer, you'll also need FLTK 1.1, but this program
+is not required to use OpenEXR's libraries in your application.
+exrdisplay can also accelerate the display of OpenEXR images if you
+have the NVIDIA Cg SDK.
+
+Your OS distribution may already include these libraries, or supply
+packages for them.  That is the preferred way to obtain them for use
+with OpenEXR.  If not, you can obtain the source code for zlib and
+FLTK from:
+
+   http://www.zlib.net
+   http://www.fltk.org
+
+and you can download the NVIDIA Cg SDK from
+http://developer.nvidia.com.
+
+If you're building OpenEXR on a Windows platform, see README.win32 for
+instructions on how to build OpenEXR.  The remainder of this file
+applies only to GNU/Linux or other UNIX-like systems.
+
+After installing the required libraries, to build OpenEXR on
+GNU/Linux or other UNIX-like systems, do this:
+
+./configure
+make
+make install
+
+unless you obtained OpenEXR directly from CVS, in which case you
+should first read README.CVS.
+
+If you have the Nvidia Cg SDK and you want to build support for
+fragment shaders into exrdisplay, specify the path to the SDK using
+the "--with-cg-prefix" flag.  There are some additional compile-time
+configuration options available; type `./configure --help` for more
+information.
+
+See README.OSX for details on building OpenEXR in MacOS X.
+
+Do `make check` to run the OpenEXR confidence tests.  They should all
+pass; if you find a test that does not pass on your system, please let
+us know.
+
+Other UNIX variants haven't been tested, but should be easy to build.
+Let us know if you're having problems porting OpenEXR to a particular
+platform.
+
+All include files needed to use the OpenEXR libraries are installed in the 
+OpenEXR subdirectory of the install prefix, e.g. /usr/local/include/OpenEXR.
+
+
+USING OPENEXR IN YOUR APPLICATIONS
+----------------------------------
+
+On systems with support for pkg-config, use `pkg-config --cflags
+OpenEXR` for the C++ flags required to compile against OpenEXR
+headers; and `pkg-config --libs OpenEXR` for the linker flags required
+to link against OpenEXR libraries.
+
--- a/3rdparty/include/cblas.h
+++ b/3rdparty/include/cblas.h
@ -0,0 +1,100 @@
+/* CLAPACK 3.0 BLAS wrapper macros and functions
+ * Feb 5, 2000
+ */
+
+#ifndef __CBLAS_H
+#define __CBLAS_H
+
+#include "f2c.h"
+
+#if defined _MSC_VER && _MSC_VER >= 1400
+#pragma warning(disable: 4244 4554)
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static __inline double r_lg10(real *x)
+{
+    return 0.43429448190325182765*log(*x);
+}
+
+static __inline double d_lg10(doublereal *x)
+{
+    return 0.43429448190325182765*log(*x);
+}
+
+static __inline double d_sign(doublereal *a, doublereal *b)
+{
+    double x = fabs(*a);
+    return *b >= 0 ? x : -x;
+}
+
+static __inline double r_sign(real *a, real *b)
+{
+    double x = fabs((double)*a);
+    return *b >= 0 ? x : -x;
+}
+
+extern const unsigned char lapack_toupper_tab[];
+#define lapack_toupper(c) ((char)lapack_toupper_tab[(unsigned char)(c)])
+
+extern const unsigned char lapack_lamch_tab[];
+extern const doublereal lapack_dlamch_tab[];
+extern const doublereal lapack_slamch_tab[];
+    
+static __inline logical lsame_(char *ca, char *cb)
+{
+    return lapack_toupper(ca[0]) == lapack_toupper(cb[0]);
+}
+
+static __inline doublereal dlamch_(char* cmach)
+{
+    return lapack_dlamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
+}
+    
+static __inline doublereal slamch_(char* cmach)
+{
+    return lapack_slamch_tab[lapack_lamch_tab[(unsigned char)cmach[0]]];
+}    
+    
+static __inline integer i_nint(real *x)
+{
+    return (integer)(*x >= 0 ? floor(*x + .5) : -floor(.5 - *x));
+}
+
+static __inline void exit_(integer *rc)
+{
+    exit(*rc);
+}
+
+integer pow_ii(integer *ap, integer *bp);
+double pow_ri(real *ap, integer *bp);
+double pow_di(doublereal *ap, integer *bp);
+
+static __inline double pow_dd(doublereal *ap, doublereal *bp)
+{
+    return pow(*ap, *bp);
+}
+
+logical slaisnan_(real *in1, real *in2);
+logical dlaisnan_(doublereal *din1, doublereal *din2);
+
+static __inline logical sisnan_(real *in1)
+{
+    return slaisnan_(in1, in1);
+}
+
+static __inline logical disnan_(doublereal *din1)
+{
+    return dlaisnan_(din1, din1);
+}
+
+char *F77_aloc(ftnlen, char*);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* __BLASWRAP_H */
--- a/3rdparty/include/clapack.h
+++ b/3rdparty/include/clapack.h
--- a/3rdparty/include/dshow/_mingw_dxhelper.h
+++ b/3rdparty/include/dshow/_mingw_dxhelper.h
@ -1,110 +0,0 @@
-/**
- * This file has no copyright assigned and is placed in the Public Domain.
- * This file is part of the w64 mingw-runtime package.
- * No warranty is given; refer to the file DISCLAIMER within this package.
- */
-
-#if defined(_MSC_VER) && !defined(_MSC_EXTENSIONS)
-#define NONAMELESSUNION		1
-#endif
-#if defined(NONAMELESSSTRUCT) && \
-   !defined(NONAMELESSUNION)
-#define NONAMELESSUNION		1
-#endif
-#if defined(NONAMELESSUNION)  && \
-   !defined(NONAMELESSSTRUCT)
-#define NONAMELESSSTRUCT	1
-#endif
-
-#ifndef __ANONYMOUS_DEFINED
-#define __ANONYMOUS_DEFINED
-#if defined(__GNUC__) || defined(__GNUG__)
-#define _ANONYMOUS_UNION	__extension__
-#define _ANONYMOUS_STRUCT	__extension__
-#else
-#define _ANONYMOUS_UNION
-#define _ANONYMOUS_STRUCT
-#endif
-#ifndef NONAMELESSUNION
-#define _UNION_NAME(x)
-#define _STRUCT_NAME(x)
-#else /* NONAMELESSUNION */
-#define _UNION_NAME(x)  x
-#define _STRUCT_NAME(x) x
-#endif
-#endif	/* __ANONYMOUS_DEFINED */
-
-#ifndef DUMMYUNIONNAME
-# ifdef NONAMELESSUNION
-#  define DUMMYUNIONNAME  u
-#  define DUMMYUNIONNAME1 u1	/* Wine uses this variant */
-#  define DUMMYUNIONNAME2 u2
-#  define DUMMYUNIONNAME3 u3
-#  define DUMMYUNIONNAME4 u4
-#  define DUMMYUNIONNAME5 u5
-#  define DUMMYUNIONNAME6 u6
-#  define DUMMYUNIONNAME7 u7
-#  define DUMMYUNIONNAME8 u8
-#  define DUMMYUNIONNAME9 u9
-# else /* NONAMELESSUNION */
-#  define DUMMYUNIONNAME
-#  define DUMMYUNIONNAME1	/* Wine uses this variant */
-#  define DUMMYUNIONNAME2
-#  define DUMMYUNIONNAME3
-#  define DUMMYUNIONNAME4
-#  define DUMMYUNIONNAME5
-#  define DUMMYUNIONNAME6
-#  define DUMMYUNIONNAME7
-#  define DUMMYUNIONNAME8
-#  define DUMMYUNIONNAME9
-# endif
-#endif	/* DUMMYUNIONNAME */
-
-#if !defined(DUMMYUNIONNAME1)	/* MinGW does not define this one */
-# ifdef NONAMELESSUNION
-#  define DUMMYUNIONNAME1 u1	/* Wine uses this variant */
-# else
-#  define DUMMYUNIONNAME1	/* Wine uses this variant */
-# endif
-#endif	/* DUMMYUNIONNAME1 */
-
-#ifndef DUMMYSTRUCTNAME
-# ifdef NONAMELESSUNION
-#  define DUMMYSTRUCTNAME  s
-#  define DUMMYSTRUCTNAME1 s1	/* Wine uses this variant */
-#  define DUMMYSTRUCTNAME2 s2
-#  define DUMMYSTRUCTNAME3 s3
-#  define DUMMYSTRUCTNAME4 s4
-#  define DUMMYSTRUCTNAME5 s5
-# else
-#  define DUMMYSTRUCTNAME
-#  define DUMMYSTRUCTNAME1	/* Wine uses this variant */
-#  define DUMMYSTRUCTNAME2
-#  define DUMMYSTRUCTNAME3
-#  define DUMMYSTRUCTNAME4
-#  define DUMMYSTRUCTNAME5
-# endif
-#endif /* DUMMYSTRUCTNAME */
-
-/* These are for compatibility with the Wine source tree */
-
-#ifndef WINELIB_NAME_AW
-# ifdef __MINGW_NAME_AW
-#   define WINELIB_NAME_AW  __MINGW_NAME_AW
-# else
-#  ifdef UNICODE
-#   define WINELIB_NAME_AW(func) func##W
-#  else
-#   define WINELIB_NAME_AW(func) func##A
-#  endif
-# endif
-#endif	/* WINELIB_NAME_AW */
-
-#ifndef DECL_WINELIB_TYPE_AW
-# ifdef __MINGW_TYPEDEF_AW
-#  define DECL_WINELIB_TYPE_AW  __MINGW_TYPEDEF_AW
-# else
-#  define DECL_WINELIB_TYPE_AW(type)  typedef WINELIB_NAME_AW(type) type;
-# endif
-#endif	/* DECL_WINELIB_TYPE_AW */
-
--- a/3rdparty/include/dshow/_mingw_unicode.h
+++ b/3rdparty/include/dshow/_mingw_unicode.h
@ -1,33 +0,0 @@
-/**
- * This file has no copyright assigned and is placed in the Public Domain.
- * This file is part of the w64 mingw-runtime package.
- * No warranty is given; refer to the file DISCLAIMER.PD within this package.
- */
-
-#if !defined(_INC_CRT_UNICODE_MACROS)
-/* _INC_CRT_UNICODE_MACROS defined based on UNICODE flag */
-
-#if defined(UNICODE)
-# define _INC_CRT_UNICODE_MACROS 1
-# define __MINGW_NAME_AW(func) func##W
-# define __MINGW_NAME_AW_EXT(func,ext) func##W##ext
-# define __MINGW_NAME_UAW(func) func##_W
-# define __MINGW_NAME_UAW_EXT(func,ext) func##_W_##ext
-# define __MINGW_STRING_AW(str) L##str	/* same as TEXT() from winnt.h */
-# define __MINGW_PROCNAMEEXT_AW "W"
-#else
-# define _INC_CRT_UNICODE_MACROS 2
-# define __MINGW_NAME_AW(func) func##A
-# define __MINGW_NAME_AW_EXT(func,ext) func##A##ext
-# define __MINGW_NAME_UAW(func) func##_A
-# define __MINGW_NAME_UAW_EXT(func,ext) func##_A_##ext
-# define __MINGW_STRING_AW(str) str	/* same as TEXT() from winnt.h */
-# define __MINGW_PROCNAMEEXT_AW "A"
-#endif
-
-#define __MINGW_TYPEDEF_AW(type)	\
-    typedef __MINGW_NAME_AW(type) type;
-#define __MINGW_TYPEDEF_UAW(type)	\
-    typedef __MINGW_NAME_UAW(type) type;
-
-#endif /* !defined(_INC_CRT_UNICODE_MACROS) */
--- a/3rdparty/include/dshow/amvideo.h
+++ b/3rdparty/include/dshow/amvideo.h
--- a/3rdparty/include/dshow/audevcod.h
+++ b/3rdparty/include/dshow/audevcod.h
@ -1,31 +0,0 @@
-#ifndef _AUDEVCOD_H
-#define _AUDEVCOD_H
-#if __GNUC__ >=3
-#pragma GCC system_header
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-typedef enum _tagSND_DEVICE_ERROR {
-    SNDDEV_ERROR_Open = 1,
-    SNDDEV_ERROR_Close = 2,
-    SNDDEV_ERROR_GetCaps = 3,
-    SNDDEV_ERROR_PrepareHeader = 4,
-    SNDDEV_ERROR_UnprepareHeader = 5,
-    SNDDEV_ERROR_Reset = 6,
-    SNDDEV_ERROR_Restart = 7,
-    SNDDEV_ERROR_GetPosition = 8,
-    SNDDEV_ERROR_Write = 9,
-    SNDDEV_ERROR_Pause = 10,
-    SNDDEV_ERROR_Stop = 11,
-    SNDDEV_ERROR_Start = 12,
-    SNDDEV_ERROR_AddBuffer = 13,
-    SNDDEV_ERROR_Query = 14
-} SNDDEV_ERR;
-
-#ifdef __cplusplus
-}
-#endif
-#endif
--- a/3rdparty/include/dshow/bdatypes.h
+++ b/3rdparty/include/dshow/bdatypes.h
@ -1,32 +0,0 @@
-#ifndef _BDATYPES_H
-#define _BDATYPES_H
-#if __GNUC__ >= 3
-#pragma GCC system_header
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*--- DirectShow Reference - DirectShow Enumerated Types */
-typedef enum {
-    MEDIA_TRANSPORT_PACKET,
-    MEDIA_ELEMENTARY_STREAM,
-    MEDIA_MPEG2_PSI,
-    MEDIA_TRANSPORT_PAYLOAD
-} MEDIA_SAMPLE_CONTENT;
-/*--- DirectShow Reference - DirectShow Structures */
-typedef struct {
-    DWORD dwOffset;
-    DWORD dwPacketLength;
-    DWORD dwStride;
-} MPEG2_TRANSPORT_STRIDE;
-typedef struct {
-    ULONG ulPID;
-    MEDIA_SAMPLE_CONTENT MediaSampleContent ;
-} PID_MAP;
-
-#ifdef __cplusplus
-}
-#endif
-#endif
--- a/3rdparty/include/dshow/control.h
+++ b/3rdparty/include/dshow/control.h
--- a/3rdparty/include/dshow/ddraw.h
+++ b/3rdparty/include/dshow/ddraw.h
--- a/3rdparty/include/dshow/dshow.h
+++ b/3rdparty/include/dshow/dshow.h
@ -1,61 +0,0 @@
-/*
- * Copyright (C) 2002 Alexandre Julliard
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
- */
-
-#ifndef __DSHOW_INCLUDED__
-#define __DSHOW_INCLUDED__
-
-#define AM_NOVTABLE
-
-#ifndef __WINESRC__
-# include <windows.h>
-# include <windowsx.h>
-#else
-# include <windef.h>
-# include <wingdi.h>
-# include <objbase.h>
-#endif
-#include <olectl.h>
-#include <dshow/ddraw.h>
-#include <mmsystem.h>
-/* FIXME: #include <strsafe.h>*/
-
-#ifndef NUMELMS
-#define NUMELMS(array) (sizeof(array)/sizeof((array)[0]))
-#endif
-
-#include <dshow/strmif.h>
-#include <dshow/amvideo.h>
-#ifdef DSHOW_USE_AMAUDIO
-/* FIXME: #include <amaudio.h>*/
-#endif
-#include <dshow/control.h>
-#include <dshow/evcode.h>
-#include <dshow/uuids.h>
-#include <dshow/errors.h>
-/* FIXME: #include <edevdefs.h> */
-#include <dshow/audevcod.h>
-/* FIXME: #include <dvdevcod.h> */
-
-#ifndef OATRUE
-#define OATRUE (-1)
-#endif
-#ifndef OAFALSE
-#define OAFALSE (0)
-#endif
-
-#endif /* __DSHOW_INCLUDED__ */
--- a/3rdparty/include/dshow/dsound.h
+++ b/3rdparty/include/dshow/dsound.h
--- a/3rdparty/include/dshow/dvdmedia.h
+++ b/3rdparty/include/dshow/dvdmedia.h
@ -1,75 +0,0 @@
-/*
- * Copyright (C) 2008 Maarten Lankhorst
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
- */
-
-#ifndef __DVDMEDIA_H__
-#define __DVDMEDIA_H__
-
-#define AMCONTROL_USED 0x00000001
-#define AMCONTROL_PAD_TO_4x3 0x00000002
-#define AMCONTROL_PAD_TO_16x9 0x00000004
-
-enum AM_MPEG2Level {
-    AM_MPEG2Level_Low = 1,
-    AM_MPEG2Level_Main,
-    AM_MPEG2Level_High1440,
-    AM_MPEG2Level_High
-};
-enum AM_MPEG2Profile {
-    AM_MPEG2Profile_Simple = 1,
-    AM_MPEG2Profile_Main,
-    AM_MPEG2Profile_SNRScalable,
-    AM_MPEG2Profile_SpatiallyScalable,
-    AM_MPEG2Profile_High
-};
-typedef enum {
-    AM_RATE_ChangeRate = 1,
-    AM_RATE_FullDataRateMax = 2,
-    AM_RATE_ReverseDecode = 3,
-    AM_RATE_DecoderPosition = 4,
-    AM_RATE_DecoderVersion = 5
-} AM_PROPERTY_DVD_RATE_CHANGE;
-
-typedef struct tagVIDEOINFOHEADER2 {
-    RECT rcSource;
-    RECT rcTarget;
-    DWORD dwBitRate;
-    DWORD dwBitErrorRate;
-    REFERENCE_TIME AvgTimePerFrame;
-    DWORD dwInterlaceFlags;
-    DWORD dwCopyProtectFlags;
-    DWORD dwPictAspectRatioX;
-    DWORD dwPictAspectRatioY;
-    union {
-        DWORD dwControlFlags;
-        DWORD dwReserved1;
-    } DUMMYUNIONNAME;
-    DWORD dwReserved2;
-    BITMAPINFOHEADER bmiHeader;
-} VIDEOINFOHEADER2;
-
-typedef struct tagMPEG2VIDEOINFO {
-    VIDEOINFOHEADER2 hdr;
-    DWORD dwStartTimeCode;
-    DWORD cbSequenceHeader;
-    DWORD dwProfile;
-    DWORD dwLevel;
-    DWORD dwFlags;
-    DWORD dwSequenceHeader[1];
-} MPEG2VIDEOINFO;
-
-#endif /* __DVDMEDIA_H__ */
--- a/3rdparty/include/dshow/errors.h
+++ b/3rdparty/include/dshow/errors.h
@ -1,169 +0,0 @@
-#ifndef _ERRORS_H
-#define _ERRORS_H
-#if __GNUC__ >=3
-#pragma GCC system_header
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*--- DirectShow Reference - Constants and GUIDs - Error and Success Codes */
-#define VFW_S_NO_MORE_ITEMS 0x00040103
-#define VFW_S_DUPLICATE_NAME 0x0004022D
-#define VFW_S_STATE_INTERMEDIATE 0x00040237
-#define VFW_S_PARTIAL_RENDER 0x00040242
-#define VFW_S_SOME_DATA_IGNORED 0x00040245
-#define VFW_S_CONNECTIONS_DEFERRED 0x00040246
-#define VFW_S_RESOURCE_NOT_NEEDED 0x00040250
-#define VFW_S_MEDIA_TYPE_IGNORED 0x00040254
-#define VFW_S_VIDEO_NOT_RENDERED 0x00040257
-#define VFW_S_AUDIO_NOT_RENDERED 0x00040258
-#define VFW_S_RPZA 0x0004025A
-#define VFW_S_ESTIMATED 0x00040260
-#define VFW_S_RESERVED 0x00040263
-#define VFW_S_STREAM_OFF 0x00040267
-#define VFW_S_CANT_CUE 0x00040268
-#define VFW_S_NOPREVIEWPIN 0x0004027E
-#define VFW_S_DVD_NON_ONE_SEQUENTIAL 0x00040280
-#define VFW_S_DVD_CHANNEL_CONTENTS_NOT_AVAILABLE 0x0004028C
-#define VFW_S_DVD_NOT_ACCURATE 0x0004028D
-#define VFW_E_INVALIDMEDIATYPE 0x80040200
-#define VFW_E_INVALIDSUBTYPE 0x80040201
-#define VFW_E_NEED_OWNER 0x80040202
-#define VFW_E_ENUM_OUT_OF_SYNC 0x80040203
-#define VFW_E_ALREADY_CONNECTED 0x80040204
-#define VFW_E_FILTER_ACTIVE 0x80040205
-#define VFW_E_NO_TYPES 0x80040206
-#define VFW_E_NO_ACCEPTABLE_TYPES 0x80040207
-#define VFW_E_INVALID_DIRECTION 0x80040208
-#define VFW_E_NOT_CONNECTED 0x80040209
-#define VFW_E_NO_ALLOCATOR 0x8004020A
-#define VFW_E_RUNTIME_ERROR 0x8004020B
-#define VFW_E_BUFFER_NOTSET 0x8004020C
-#define VFW_E_BUFFER_OVERFLOW 0x8004020D
-#define VFW_E_BADALIGN 0x8004020E
-#define VFW_E_ALREADY_COMMITTED 0x8004020F
-#define VFW_E_BUFFERS_OUTSTANDING 0x80040210
-#define VFW_E_NOT_COMMITTED 0x80040211
-#define VFW_E_SIZENOTSET 0x80040212
-#define VFW_E_NO_CLOCK 0x80040213
-#define VFW_E_NO_SINK 0x80040214
-#define VFW_E_NO_INTERFACE 0x80040215
-#define VFW_E_NOT_FOUND 0x80040216
-#define VFW_E_CANNOT_CONNECT 0x80040217
-#define VFW_E_CANNOT_RENDER 0x80040218
-#define VFW_E_CHANGING_FORMAT 0x80040219
-#define VFW_E_NO_COLOR_KEY_SET 0x8004021A
-#define VFW_E_NOT_OVERLAY_CONNECTION 0x8004021B
-#define VFW_E_NOT_SAMPLE_CONNECTION 0x8004021C
-#define VFW_E_PALETTE_SET 0x8004021D
-#define VFW_E_COLOR_KEY_SET 0x8004021E
-#define VFW_E_NO_COLOR_KEY_FOUND 0x8004021F
-#define VFW_E_NO_PALETTE_AVAILABLE 0x80040220
-#define VFW_E_NO_DISPLAY_PALETTE 0x80040221
-#define VFW_E_TOO_MANY_COLORS 0x80040222
-#define VFW_E_STATE_CHANGED 0x80040223
-#define VFW_E_NOT_STOPPED 0x80040224
-#define VFW_E_NOT_PAUSED 0x80040225
-#define VFW_E_NOT_RUNNING 0x80040226
-#define VFW_E_WRONG_STATE 0x80040227
-#define VFW_E_START_TIME_AFTER_END 0x80040228
-#define VFW_E_INVALID_RECT 0x80040229
-#define VFW_E_TYPE_NOT_ACCEPTED 0x8004022A
-#define VFW_E_SAMPLE_REJECTED 0x8004022B
-#define VFW_E_SAMPLE_REJECTED_EOS 0x8004022C
-#define VFW_E_DUPLICATE_NAME 0x8004022D
-#define VFW_E_TIMEOUT 0x8004022E
-#define VFW_E_INVALID_FILE_FORMAT 0x8004022F
-#define VFW_E_ENUM_OUT_OF_RANGE 0x80040230
-#define VFW_E_CIRCULAR_GRAPH 0x80040231
-#define VFW_E_NOT_ALLOWED_TO_SAVE 0x80040232
-#define VFW_E_TIME_ALREADY_PASSED 0x80040233
-#define VFW_E_ALREADY_CANCELLED 0x80040234
-#define VFW_E_CORRUPT_GRAPH_FILE 0x80040235
-#define VFW_E_ADVISE_ALREADY_SET 0x80040236
-#define VFW_E_NO_MODEX_AVAILABLE 0x80040238
-#define VFW_E_NO_ADVISE_SET 0x80040239
-#define VFW_E_NO_FULLSCREEN 0x8004023A
-#define VFW_E_IN_FULLSCREEN_MODE 0x8004023B
-#define VFW_E_UNKNOWN_FILE_TYPE 0x80040240
-#define VFW_E_CANNOT_LOAD_SOURCE_FILTER 0x80040241
-#define VFW_E_FILE_TOO_SHORT 0x80040243
-#define VFW_E_INVALID_FILE_VERSION 0x80040244
-#define VFW_E_INVALID_CLSID 0x80040247
-#define VFW_E_INVALID_MEDIA_TYPE 0x80040248
-#define VFW_E_SAMPLE_TIME_NOT_SET 0x80040249
-#define VFW_E_MEDIA_TIME_NOT_SET 0x80040251
-#define VFW_E_NO_TIME_FORMAT_SET 0x80040252
-#define VFW_E_MONO_AUDIO_HW 0x80040253
-#define VFW_E_NO_DECOMPRESSOR 0x80040255
-#define VFW_E_NO_AUDIO_HARDWARE 0x80040256
-#define VFW_E_RPZA 0x80040259
-#define VFW_E_PROCESSOR_NOT_SUITABLE 0x8004025B
-#define VFW_E_UNSUPPORTED_AUDIO 0x8004025C
-#define VFW_E_UNSUPPORTED_VIDEO 0x8004025D
-#define VFW_E_MPEG_NOT_CONSTRAINED 0x8004025E
-#define VFW_E_NOT_IN_GRAPH 0x8004025F
-#define VFW_E_NO_TIME_FORMAT 0x80040261
-#define VFW_E_READ_ONLY 0x80040262
-#define VFW_E_BUFFER_UNDERFLOW 0x80040264
-#define VFW_E_UNSUPPORTED_STREAM 0x80040265
-#define VFW_E_NO_TRANSPORT 0x80040266
-#define VFW_E_BAD_VIDEOCD 0x80040269
-#define VFW_S_NO_STOP_TIME 0x80040270
-#define VFW_E_OUT_OF_VIDEO_MEMORY 0x80040271
-#define VFW_E_VP_NEGOTIATION_FAILED 0x80040272
-#define VFW_E_DDRAW_CAPS_NOT_SUITABLE 0x80040273
-#define VFW_E_NO_VP_HARDWARE 0x80040274
-#define VFW_E_NO_CAPTURE_HARDWARE 0x80040275
-#define VFW_E_DVD_OPERATION_INHIBITED 0x80040276
-#define VFW_E_DVD_INVALIDDOMAIN 0x80040277
-#define VFW_E_DVD_NO_BUTTON 0x80040278
-#define VFW_E_DVD_GRAPHNOTREADY 0x80040279
-#define VFW_E_DVD_RENDERFAIL 0x8004027A
-#define VFW_E_DVD_DECNOTENOUGH 0x8004027B
-#define VFW_E_DDRAW_VERSION_NOT_SUITABLE 0x8004027C
-#define VFW_E_COPYPROT_FAILED 0x8004027D
-#define VFW_E_TIME_EXPIRED 0x8004027F
-#define VFW_E_DVD_WRONG_SPEED 0x80040281
-#define VFW_E_DVD_MENU_DOES_NOT_EXIST 0x80040282
-#define VFW_E_DVD_CMD_CANCELLED 0x80040283
-#define VFW_E_DVD_STATE_WRONG_VERSION 0x80040284
-#define VFW_E_DVD_STATE_CORRUPT 0x80040285
-#define VFW_E_DVD_STATE_WRONG_DISC 0x80040286
-#define VFW_E_DVD_INCOMPATIBLE_REGION 0x80040287
-#define VFW_E_DVD_NO_ATTRIBUTES 0x80040288
-#define VFW_E_DVD_NO_GOUP_PGC 0x80040289
-#define VFW_E_DVD_LOW_PARENTAL_LEVEL 0x8004028A
-#define VFW_E_DVD_NOT_IN_KARAOKE_MODE 0x8004028B
-#define VFW_E_FRAME_STEP_UNSUPPORTED 0x8004028E
-#define VFW_E_DVD_STREAM_DISABLED 0x8004028F
-#define VFW_E_DVD_TITLE_UNKNOWN 0x80040290
-#define VFW_E_DVD_INVALID_DISC 0x80040291
-#define VFW_E_DVD_NO_RESUME_INFORMATION 0x80040292
-#define VFW_E_PIN_ALREADY_BLOCKED_ON_THIS_THREAD 0x80040293
-#define VFW_E_PIN_ALREADY_BLOCKED 0x80040294
-#define VFW_E_CERTIFICATION_FAILURE 0x80040295
-#define VFW_E_VMR_NOT_IN_MIXER_MODE 0x80040296
-#define VFW_E_VMR_NO_AP_SUPPLIED 0x80040297
-#define VFW_E_VMR_NO_DEINTERLACE_HW 0x80040298
-#define VFW_E_VMR_NO_PROCAMP_HW 0x80040299
-#define VFW_E_DVD_VMR9_INCOMPATIBLEDEC 0x8004029A
-#define VFW_E_NO_COPP_HW 0x8004029B
-#define VFW_E_BAD_KEY 0x800403F2
-/*--- DirectShow Reference - Functions */
-#define MAX_ERROR_TEXT_LEN 160
-/*--- DirectShow Reference - Functions */
-DWORD WINAPI AMGetErrorTextA(HRESULT,CHAR*,DWORD);
-DWORD WINAPI AMGetErrorTextW(HRESULT,WCHAR*,DWORD);
-#ifdef UNICODE
-#define AMGetErrorText AMGetErrorTextW
-#else
-#define AMGetErrorText AMGetErrorTextA
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-#endif
--- a/3rdparty/include/dshow/evcode.h
+++ b/3rdparty/include/dshow/evcode.h
@ -1,68 +0,0 @@
-#ifndef _EVCODE_H
-#define _EVCODE_H
-#if __GNUC__ >=3
-#pragma GCC system_header
-#endif
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*--- DirectShow Reference - Constants and GUIDs - Event Notification Codes */
-#define EC_ACTIVATE 0x0013
-#define EC_BUFFERING_DATA 0x0011
-#define EC_BUILT 0x0300
-#define EC_CLOCK_CHANGED 0x000D
-#define EC_CLOCK_UNSET 0x0051
-#define EC_CODECAPI_EVENT 0x0057
-#define EC_COMPLETE 0x0001
-#define EC_DEVICE_LOST 0x001F
-#define EC_DISPLAY_CHANGED 0x0016
-#define EC_END_OF_SEGMENT 0x001C
-#define EC_ERROR_STILLPLAYING 0x0008
-#define EC_ERRORABORT 0x0003
-#define EC_EXTDEVICE_MODE_CHANGE 0x0031
-#define EC_FULLSCREEN_LOST 0x0012
-#define EC_GRAPH_CHANGED 0x0050
-#define EC_LENGTH_CHANGED 0x001E
-#define EC_NEED_RESTART 0x0014
-#define EC_NOTIFY_WINDOW 0x0019
-#define EC_OLE_EVENT 0x0018
-#define EC_OPENING_FILE 0x0010
-#define EC_PALETTE_CHANGED 0x0009
-#define EC_PAUSED 0x000E
-#define EC_PREPROCESS_COMPLETE 0x0056
-#define EC_QUALITY_CHANGE 0x000B
-#define EC_REPAINT 0x0005
-#define EC_SEGMENT_STARTED 0x001D
-#define EC_SHUTTING_DOWN 0x000C
-#define EC_SNDDEV_IN_ERROR 0x0200
-#define EC_SNDDEV_OUT_ERROR 0x0201
-#define EC_STARVATION 0x0017
-#define EC_STATE_CHANGE 0x0032
-#define EC_STEP_COMPLETE 0x0024
-#define EC_STREAM_CONTROL_STARTED 0x001B
-#define EC_STREAM_CONTROL_STOPPED 0x001A
-#define EC_STREAM_ERROR_STILLPLAYING 0x0007
-#define EC_STREAM_ERROR_STOPPED 0x0006
-#define EC_TIMECODE_AVAILABLE 0x0030
-#define EC_UNBUILT 0x0301
-#define EC_USERABORT 0x0002
-#define EC_VIDEO_SIZE_CHANGED 0x000A
-#define EC_VMR_RENDERDEVICE_SET 0x0053
-#define EC_VMR_SURFACE_FLIPPED 0x0054
-#define EC_VMR_RECONNECTION_FAILED 0x0055
-#define EC_WINDOW_DESTROYED 0x0015
-#define EC_WMT_EVENT 0x0252
-#define EC_WMT_INDEX_EVENT 0x0251
-#define EC_USER 0x8000
-/*--- DirectShow Reference - DirectShow Structures */
-typedef struct {
-    HRESULT hrStatus;
-    void   *pData;
-} AM_WMT_EVENT_DATA;
-
-#ifdef __cplusplus
-}
-#endif
-#endif
--- a/3rdparty/include/dshow/ksuuids.h
+++ b/3rdparty/include/dshow/ksuuids.h
@ -1,191 +0,0 @@
-/**
- * This file has no copyright assigned and is placed in the Public Domain.
- * This file is part of the w64 mingw-runtime package.
- * No warranty is given; refer to the file DISCLAIMER.PD within this package.
- */
-
-OUR_GUID_ENTRY(MEDIATYPE_MPEG2_PACK,
-        0x36523B13,0x8EE5,0x11d1,0x8C,0xA3,0x00,0x60,0xB0,0x57,0x66,0x4A)
-
-OUR_GUID_ENTRY(MEDIATYPE_MPEG2_PES,
-        0xe06d8020,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x5f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_WMDRM_TRANSPORT,
-        0x18BEC4EA,0x4676,0x450e,0xB4,0x78,0x0C,0xD8,0x4C,0x54,0xB3,0x27)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_VIDEO,
-        0xe06d8026,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x5f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(FORMAT_MPEG2_VIDEO,
-        0xe06d80e3,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x5f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(FORMAT_VIDEOINFO2,
-        0xf72a76A0L,0xeb0a,0x11d0,0xac,0xe4,0x0,0x0,0xc0,0xcc,0x16,0xba)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_PROGRAM,
-        0xe06d8022,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_TRANSPORT,
-        0xe06d8023,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_AUDIO,
-        0xe06d802b,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DOLBY_AC3,
-        0xe06d802c,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVD_SUBPICTURE,
-        0xe06d802d,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVD_LPCM_AUDIO,
-        0xe06d8032,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIATYPE_DVD_ENCRYPTED_PACK,
-        0xed0b916a,0x044d,0x11d1,0xaa,0x78,0x00,0xc0,0x04f,0xc3,0x1d,0x60)
-
-OUR_GUID_ENTRY(MEDIATYPE_DVD_NAVIGATION,
-        0xe06d802e,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVD_NAVIGATION_PCI,
-        0xe06d802f,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVD_NAVIGATION_DSI,
-        0xe06d8030,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVD_NAVIGATION_PROVIDER,
-        0xe06d8031,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(FORMAT_MPEG2Video,
-        0xe06d80e3,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(FORMAT_DolbyAC3,
-        0xe06d80e4,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(FORMAT_MPEG2Audio,
-        0xe06d80e5,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(FORMAT_DVD_LPCMAudio,
-        0xe06d80e6,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_AC3,
-        0xBFABE720,0x6E1F,0x11D0,0xBC,0xF2,0x44,0x45,0x53,0x54,0x00,0x00)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_DvdSubPic,
-        0xac390460,0x43af,0x11d0,0xbd,0x6a,0x00,0x35,0x05,0xc1,0x03,0xa9)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_CopyProt,
-        0x0E8A0A40,0x6AEF,0x11D0,0x9E,0xD0,0x00,0xA0,0x24,0xCA,0x19,0xB3)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_TSRateChange,
-        0xa503c5c0,0x1d1d,0x11d1,0xad,0x80,0x44,0x45,0x53,0x54,0x0,0x0)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_MPEG4_MediaType_Attributes,
-        0xff6c4bfa,0x7a9,0x4c7b,0xa2,0x37,0x67,0x2f,0x9d,0x68,0x6,0x5f)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_CAPTURE,
-        0x65E8773DL,0x8F56,0x11D0,0xA3,0xB9,0x00,0xA0,0xC9,0x22,0x31,0x96)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_RENDER,
-        0x65E8773EL,0x8F56,0x11D0,0xA3,0xB9,0x00,0xA0,0xC9,0x22,0x31,0x96)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_DATACOMPRESSOR,
-        0x1E84C900L,0x7E70,0x11D0,0xA5,0xD6,0x28,0xDB,0x04,0xC1,0x00,0x00)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_AUDIO,
-        0x6994AD04L,0x93EF,0x11D0,0xA3,0xCC,0x00,0xA0,0xC9,0x22,0x31,0x96)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_VIDEO,
-        0x6994AD05L,0x93EF,0x11D0,0xA3,0xCC,0x00,0xA0,0xC9,0x22,0x31,0x96)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_TVTUNER,
-        0xa799a800L,0xa46d,0x11d0,0xa1,0x8c,0x00,0xa0,0x24,0x01,0xdc,0xd4)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_CROSSBAR,
-        0xa799a801L,0xa46d,0x11d0,0xa1,0x8c,0x00,0xa0,0x24,0x01,0xdc,0xd4)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_TVAUDIO,
-        0xa799a802L,0xa46d,0x11d0,0xa1,0x8c,0x00,0xa0,0x24,0x01,0xdc,0xd4)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_VBICODEC,
-        0x07dad660L,0x22f1,0x11d1,0xa9,0xf4,0x00,0xc0,0x4f,0xbb,0xde,0x8f)
-
-OUR_GUID_ENTRY(AM_KSCATEGORY_SPLITTER,
-        0x0A4252A0L,0x7E70,0x11D0,0xA5,0xD6,0x28,0xDB,0x04,0xC1,0x00,0x00)
-
-OUR_GUID_ENTRY(IID_IKsInterfaceHandler,
-        0xD3ABC7E0L,0x9A61,0x11D0,0xA4,0x0D,0x00,0xA0,0xC9,0x22,0x31,0x96)
-
-OUR_GUID_ENTRY(IID_IKsDataTypeHandler,
-        0x5FFBAA02L,0x49A3,0x11D0,0x9F,0x36,0x00,0xAA,0x00,0xA2,0x16,0xA1)
-
-OUR_GUID_ENTRY(IID_IKsPin,
-        0xb61178d1L,0xa2d9,0x11cf,0x9e,0x53,0x00,0xaa,0x00,0xa2,0x16,0xa1)
-
-OUR_GUID_ENTRY(IID_IKsControl,
-        0x28F54685L,0x06FD,0x11D2,0xB2,0x7A,0x00,0xA0,0xC9,0x22,0x31,0x96)
-
-OUR_GUID_ENTRY(IID_IKsPinFactory,
-        0xCD5EBE6BL,0x8B6E,0x11D1,0x8A,0xE0,0x00,0xA0,0xC9,0x22,0x31,0x96)
-
-OUR_GUID_ENTRY(AM_INTERFACESETID_Standard,
-        0x1A8766A0L,0x62CE,0x11CF,0xA5,0xD6,0x28,0xDB,0x04,0xC1,0x00,0x00)
-
-#if ( (NTDDI_VERSION >= NTDDI_WINXPSP2) && (NTDDI_VERSION < NTDDI_WS03) ) || (NTDDI_VERSION >= NTDDI_WS03SP1)
-OUR_GUID_ENTRY(MEDIATYPE_MPEG2_SECTIONS,
-        0x455f176c,0x4b06,0x47ce,0x9a,0xef,0x8c,0xae,0xf7,0x3d,0xf7,0xb5)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_VERSIONED_TABLES,
-        0x1ed988b0,0x3ffc,0x4523,0x87,0x25,0x34,0x7b,0xee,0xc1,0xa8,0xa0)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_ATSC_SI,
-        0xb3c7397c,0xd303,0x414d,0xb3,0x3c,0x4e,0xd2,0xc9,0xd2,0x97,0x33)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVB_SI,
-        0xe9dd31a3,0x221d,0x4adb,0x85,0x32,0x9a,0xf3,0x9,0xc1,0xa4,0x8)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_ISDB_SI,
-        0xe89ad298,0x3601,0x4b06,0xaa,0xec,0x9d,0xde,0xed,0xcc,0x5b,0xd0)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_TIF_SI,
-        0xec232eb2,0xcb96,0x4191,0xb2,0x26,0xe,0xa1,0x29,0xf3,0x82,0x50)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2DATA,
-        0xc892e55b,0x252d,0x42b5,0xa3,0x16,0xd9,0x97,0xe7,0xa5,0xd9,0x95)
-#endif
-/* ( (NTDDI_VERSION >= NTDDI_WINXPSP2) && (NTDDI_VERSION < NTDDI_WS03) ) ||
-                        (NTDDI_VERSION >= NTDDI_WS03SP1) */
-
-#if (NTDDI_VERSION >= NTDDI_WINXP)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_TRANSPORT_STRIDE,
-        0x138aa9a4,0x1ee2,0x4c5b,0x98,0x8e,0x19,0xab,0xfd,0xbc,0x8a,0x11)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_UDCR_TRANSPORT,
-        0x18BEC4EA,0x4676,0x450e,0xB4,0x78,0x0C,0xD8,0x4C,0x54,0xB3,0x27)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_PBDA_TRANSPORT_RAW,
-        0x0d7aed42,0xcb9a,0x11db,0x97,0x5,0x0,0x50,0x56,0xc0,0x0,0x8)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG2_PBDA_TRANSPORT_PROCESSED,
-        0xaf748dd4,0xd80,0x11db,0x97,0x5,0x0,0x50,0x56,0xc0,0x0,0x8)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_DTS,
-        0xe06d8033,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_SDDS,
-        0xe06d8034,0xdb46,0x11cf,0xb4,0xd1,0x00,0x80,0x05f,0x6c,0xbb,0xea)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_DVD_RateChange,
-        0x3577eb09,0x9582,0x477f,0xb2,0x9c,0xb0,0xc4,0x52,0xa4,0xff,0x9a)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_DvdKaraoke,
-        0xae4720ae,0xaa71,0x42d8,0xb8,0x2a,0xff,0xfd,0xf5,0x8b,0x76,0xfd)
-
-OUR_GUID_ENTRY(AM_KSPROPSETID_FrameStep,
-        0xc830acbd,0xab07,0x492f,0x88,0x52,0x45,0xb6,0x98,0x7c,0x29,0x79)
-#endif /* NTDDI_VERSION >= NTDDI_WINXP */
-
-#if (NTDDI_VERSION >= NTDDI_WS03SP1)
-OUR_GUID_ENTRY(AM_KSCATEGORY_VBICODEC_MI,
-        0x9c24a977,0x951,0x451a,0x80,0x6,0xe,0x49,0xbd,0x28,0xcd,0x5f)
-#endif /* NTDDI_VERSION >= NTDDI_WS03SP1 */
-
--- a/3rdparty/include/dshow/strmif.h
+++ b/3rdparty/include/dshow/strmif.h
--- a/3rdparty/include/dshow/uuids.h
+++ b/3rdparty/include/dshow/uuids.h
@ -1,368 +0,0 @@
-/**
- * This file has no copyright assigned and is placed in the Public Domain.
- * This file is part of the w64 mingw-runtime package.
- * No warranty is given; refer to the file DISCLAIMER.PD within this package.
- */
-#ifndef OUR_GUID_ENTRY
-#define OUR_GUID_ENTRY(name,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8) DEFINE_GUID(name,l,w1,w2,b1,b2,b3,b4,b5,b6,b7,b8);
-#endif
-
-#define MEDIATYPE_NULL GUID_NULL
-#define MEDIASUBTYPE_NULL GUID_NULL
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_None,0xe436eb8e,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIATYPE_Video,0x73646976,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_Audio,0x73647561,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_Text,0x73747874,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_Midi,0x7364696D,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_Stream,0xe436eb83,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIATYPE_Interleaved,0x73766169,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_File,0x656c6966,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_ScriptCommand,0x73636d64,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_AUXLine21Data,0x670aea80,0x3a82,0x11d0,0xb7,0x9b,0x0,0xaa,0x0,0x37,0x67,0xa7)
-OUR_GUID_ENTRY(MEDIATYPE_VBI,0xf72a76e1,0xeb0a,0x11d0,0xac,0xe4,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(MEDIATYPE_Timecode,0x482dee3,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIATYPE_LMRT,0x74726c6d,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_URL_STREAM,0x736c7275,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_CLPL,0x4C504C43,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_YUYV,0x56595559,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IYUV,0x56555949,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_YVU9,0x39555659,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Y411,0x31313459,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Y41P,0x50313459,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_YUY2,0x32595559,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_YVYU,0x55595659,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_UYVY,0x59565955,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Y211,0x31313259,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_CLJR,0x524a4c43,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IF09,0x39304649,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_CPLA,0x414c5043,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MJPG,0x47504A4D,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_TVMJ,0x4A4D5654,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_WAKE,0x454B4157,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_CFCC,0x43434643,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IJPG,0x47504A49,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Plum,0x6D756C50,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVCS,0x53435644,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_DVSD,0x44535644,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MDVF,0x4656444D,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB1,0xe436eb78,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB4,0xe436eb79,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB8,0xe436eb7a,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB565,0xe436eb7b,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB555,0xe436eb7c,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB24,0xe436eb7d,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB32,0xe436eb7e,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB1555,0x297c55af,0xe209,0x4cb3,0xb7,0x57,0xc7,0x6d,0x6b,0x9c,0x88,0xa8)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB4444,0x6e6415e6,0x5c24,0x425f,0x93,0xcd,0x80,0x10,0x2b,0x3d,0x1c,0xca)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB32,0x773c9ac0,0x3274,0x11d0,0xb7,0x24,0x0,0xaa,0x0,0x6c,0x1a,0x1)
-OUR_GUID_ENTRY(MEDIASUBTYPE_A2R10G10B10,0x2f8bb76d,0xb644,0x4550,0xac,0xf3,0xd3,0x0c,0xaa,0x65,0xd5,0xc5)
-OUR_GUID_ENTRY(MEDIASUBTYPE_A2B10G10R10,0x576f7893,0xbdf6,0x48c4,0x87,0x5f,0xae,0x7b,0x81,0x83,0x45,0x67)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AYUV,0x56555941,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AI44,0x34344941,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IA44,0x34344149,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB32_D3D_DX7_RT,0x32335237,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB16_D3D_DX7_RT,0x36315237,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB32_D3D_DX7_RT,0x38384137,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB4444_D3D_DX7_RT,0x34344137,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB1555_D3D_DX7_RT,0x35314137,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB32_D3D_DX9_RT,0x32335239,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RGB16_D3D_DX9_RT,0x36315239,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB32_D3D_DX9_RT,0x38384139,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB4444_D3D_DX9_RT,0x34344139,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_ARGB1555_D3D_DX9_RT,0x35314139,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-
-#define MEDIASUBTYPE_HASALPHA(mt) (((mt).subtype==MEDIASUBTYPE_ARGB4444) || ((mt).subtype==MEDIASUBTYPE_ARGB32) || ((mt).subtype==MEDIASUBTYPE_AYUV) || ((mt).subtype==MEDIASUBTYPE_AI44) || ((mt).subtype==MEDIASUBTYPE_IA44) || ((mt).subtype==MEDIASUBTYPE_ARGB1555) || ((mt).subtype==MEDIASUBTYPE_ARGB32_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB4444_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB1555_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB32_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB4444_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB1555_D3D_DX9_RT))
-#define MEDIASUBTYPE_HASALPHA7(mt) (((mt).subtype==MEDIASUBTYPE_ARGB32_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB4444_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB1555_D3D_DX7_RT))
-#define MEDIASUBTYPE_D3D_DX7_RT(mt) (((mt).subtype==MEDIASUBTYPE_ARGB32_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB4444_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB1555_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_RGB32_D3D_DX7_RT) || ((mt).subtype==MEDIASUBTYPE_RGB16_D3D_DX7_RT))
-#define MEDIASUBTYPE_HASALPHA9(mt) (((mt).subtype==MEDIASUBTYPE_ARGB32_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB4444_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB1555_D3D_DX9_RT))
-#define MEDIASUBTYPE_D3D_DX9_RT(mt) (((mt).subtype==MEDIASUBTYPE_ARGB32_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB4444_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_ARGB1555_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_RGB32_D3D_DX9_RT) || ((mt).subtype==MEDIASUBTYPE_RGB16_D3D_DX9_RT))
-
-OUR_GUID_ENTRY(MEDIASUBTYPE_YV12,0x32315659,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_NV12,0x3231564E,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IMC1,0x31434D49,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IMC2,0x32434D49,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IMC3,0x33434D49,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IMC4,0x34434D49,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_S340,0x30343353,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_S342,0x32343353,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Overlay,0xe436eb7f,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG1Packet,0xe436eb80,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG1Payload,0xe436eb81,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG1AudioPayload,0x00000050,0x0000,0x0010,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71)
-OUR_GUID_ENTRY(MEDIATYPE_MPEG1SystemStream,0xe436eb82,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG1System,0xe436eb84,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG1VideoCD,0xe436eb85,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG1Video,0xe436eb86,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_MPEG1Audio,0xe436eb87,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Avi,0xe436eb88,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Asf,0x3db80f90,0x9412,0x11d1,0xad,0xed,0x0,0x0,0xf8,0x75,0x4b,0x99)
-OUR_GUID_ENTRY(MEDIASUBTYPE_QTMovie,0xe436eb89,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_QTRpza,0x617a7072,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_QTSmc,0x20636d73,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_QTRle,0x20656c72,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_QTJpeg,0x6765706a,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_PCMAudio_Obsolete,0xe436eb8a,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_PCM,0x00000001,0x0000,0x0010,0x80,0x00,0x00,0xAA,0x00,0x38,0x9B,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_WAVE,0xe436eb8b,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AU,0xe436eb8c,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AIFF,0xe436eb8d,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(MEDIASUBTYPE_dvsd,0x64737664,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_dvhd,0x64687664,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_dvsl,0x6c737664,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_dv25,0x35327664,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_dv50,0x30357664,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_dvh1,0x31687664,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Line21_BytePair,0x6e8d4a22,0x310c,0x11d0,0xb7,0x9a,0x0,0xaa,0x0,0x37,0x67,0xa7)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Line21_GOPPacket,0x6e8d4a23,0x310c,0x11d0,0xb7,0x9a,0x0,0xaa,0x0,0x37,0x67,0xa7)
-OUR_GUID_ENTRY(MEDIASUBTYPE_Line21_VBIRawData,0x6e8d4a24,0x310c,0x11d0,0xb7,0x9a,0x0,0xaa,0x0,0x37,0x67,0xa7)
-OUR_GUID_ENTRY(MEDIASUBTYPE_TELETEXT,0xf72a76e3,0xeb0a,0x11d0,0xac,0xe4,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(MEDIASUBTYPE_WSS,0x2791D576,0x8E7A,0x466F,0x9E,0x90,0x5D,0x3F,0x30,0x83,0x73,0x8B)
-OUR_GUID_ENTRY(MEDIASUBTYPE_VPS,0xa1b3f620,0x9792,0x4d8d,0x81,0xa4,0x86,0xaf,0x25,0x77,0x20,0x90)
-OUR_GUID_ENTRY(MEDIASUBTYPE_DRM_Audio,0x00000009,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_IEEE_FLOAT,0x00000003,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_DOLBY_AC3_SPDIF,0x00000092,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_RAW_SPORT,0x00000240,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_SPDIF_TAG_241h,0x00000241,0x0000,0x0010,0x80,0x00,0x00,0xaa,0x00,0x38,0x9b,0x71)
-OUR_GUID_ENTRY(MEDIASUBTYPE_DssVideo,0xa0af4f81,0xe163,0x11d0,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(MEDIASUBTYPE_DssAudio,0xa0af4f82,0xe163,0x11d0,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(MEDIASUBTYPE_VPVideo,0x5a9b6a40,0x1a22,0x11d1,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(MEDIASUBTYPE_VPVBI,0x5a9b6a41,0x1a22,0x11d1,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(CLSID_CaptureGraphBuilder,0xBF87B6E0,0x8C27,0x11d0,0xB3,0xF0,0x0,0xAA,0x00,0x37,0x61,0xC5)
-OUR_GUID_ENTRY(CLSID_CaptureGraphBuilder2,0xBF87B6E1,0x8C27,0x11d0,0xB3,0xF0,0x0,0xAA,0x00,0x37,0x61,0xC5)
-OUR_GUID_ENTRY(CLSID_ProtoFilterGraph,0xe436ebb0,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_SystemClock,0xe436ebb1,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_FilterMapper,0xe436ebb2,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_FilterGraph,0xe436ebb3,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_FilterGraphNoThread,0xe436ebb8,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_MPEG1Doc,0xe4bbd160,0x4269,0x11ce,0x83,0x8d,0x0,0xaa,0x0,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(CLSID_FileSource,0x701722e0,0x8ae3,0x11ce,0xa8,0x5c,0x00,0xaa,0x00,0x2f,0xea,0xb5)
-OUR_GUID_ENTRY(CLSID_MPEG1PacketPlayer,0x26c25940,0x4ca9,0x11ce,0xa8,0x28,0x0,0xaa,0x0,0x2f,0xea,0xb5)
-OUR_GUID_ENTRY(CLSID_MPEG1Splitter,0x336475d0,0x942a,0x11ce,0xa8,0x70,0x00,0xaa,0x00,0x2f,0xea,0xb5)
-OUR_GUID_ENTRY(CLSID_CMpegVideoCodec,0xfeb50740,0x7bef,0x11ce,0x9b,0xd9,0x0,0x0,0xe2,0x2,0x59,0x9c)
-OUR_GUID_ENTRY(CLSID_CMpegAudioCodec,0x4a2286e0,0x7bef,0x11ce,0x9b,0xd9,0x0,0x0,0xe2,0x2,0x59,0x9c)
-OUR_GUID_ENTRY(CLSID_TextRender,0xe30629d3,0x27e5,0x11ce,0x87,0x5d,0x0,0x60,0x8c,0xb7,0x80,0x66)
-OUR_GUID_ENTRY(CLSID_InfTee,0xf8388a40,0xd5bb,0x11d0,0xbe,0x5a,0x0,0x80,0xc7,0x6,0x56,0x8e)
-OUR_GUID_ENTRY(CLSID_AviSplitter,0x1b544c20,0xfd0b,0x11ce,0x8c,0x63,0x0,0xaa,0x00,0x44,0xb5,0x1e)
-OUR_GUID_ENTRY(CLSID_AviReader,0x1b544c21,0xfd0b,0x11ce,0x8c,0x63,0x0,0xaa,0x00,0x44,0xb5,0x1e)
-OUR_GUID_ENTRY(CLSID_VfwCapture,0x1b544c22,0xfd0b,0x11ce,0x8c,0x63,0x0,0xaa,0x00,0x44,0xb5,0x1e)
-OUR_GUID_ENTRY(CLSID_CaptureProperties,0x1B544c22,0xFD0B,0x11ce,0x8C,0x63,0x00,0xAA,0x00,0x44,0xB5,0x1F)
-OUR_GUID_ENTRY(CLSID_FGControl,0xe436ebb4,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_MOVReader,0x44584800,0xf8ee,0x11ce,0xb2,0xd4,0x00,0xdd,0x1,0x10,0x1b,0x85)
-OUR_GUID_ENTRY(CLSID_QuickTimeParser,0xd51bd5a0,0x7548,0x11cf,0xa5,0x20,0x0,0x80,0xc7,0x7e,0xf5,0x8a)
-OUR_GUID_ENTRY(CLSID_QTDec,0xfdfe9681,0x74a3,0x11d0,0xaf,0xa7,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(CLSID_AVIDoc,0xd3588ab0,0x0781,0x11ce,0xb0,0x3a,0x00,0x20,0xaf,0xb,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_VideoRenderer,0x70e102b0,0x5556,0x11ce,0x97,0xc0,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(CLSID_Colour,0x1643e180,0x90f5,0x11ce,0x97,0xd5,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(CLSID_Dither,0x1da08500,0x9edc,0x11cf,0xbc,0x10,0x00,0xaa,0x00,0xac,0x74,0xf6)
-OUR_GUID_ENTRY(CLSID_ModexRenderer,0x7167665,0x5011,0x11cf,0xbf,0x33,0x0,0xaa,0x0,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(CLSID_AudioRender,0xe30629d1,0x27e5,0x11ce,0x87,0x5d,0x0,0x60,0x8c,0xb7,0x80,0x66)
-OUR_GUID_ENTRY(CLSID_AudioProperties,0x05589faf,0xc356,0x11ce,0xbf,0x01,0x0,0xaa,0x0,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(CLSID_DSoundRender,0x79376820,0x07D0,0x11CF,0xA2,0x4D,0x0,0x20,0xAF,0xD7,0x97,0x67)
-OUR_GUID_ENTRY(CLSID_AudioRecord,0xe30629d2,0x27e5,0x11ce,0x87,0x5d,0x0,0x60,0x8c,0xb7,0x80,0x66)
-OUR_GUID_ENTRY(CLSID_AudioInputMixerProperties,0x2ca8ca52,0x3c3f,0x11d2,0xb7,0x3d,0x0,0xc0,0x4f,0xb6,0xbd,0x3d)
-OUR_GUID_ENTRY(CLSID_AVIDec,0xcf49d4e0,0x1115,0x11ce,0xb0,0x3a,0x0,0x20,0xaf,0xb,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_AVIDraw,0xa888df60,0x1e90,0x11cf,0xac,0x98,0x0,0xaa,0x0,0x4c,0xf,0xa9)
-OUR_GUID_ENTRY(CLSID_ACMWrapper,0x6a08cf80,0x0e18,0x11cf,0xa2,0x4d,0x0,0x20,0xaf,0xd7,0x97,0x67)
-OUR_GUID_ENTRY(CLSID_AsyncReader,0xe436ebb5,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_URLReader,0xe436ebb6,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_PersistMonikerPID,0xe436ebb7,0x524f,0x11ce,0x9f,0x53,0x00,0x20,0xaf,0x0b,0xa7,0x70)
-OUR_GUID_ENTRY(CLSID_AVICo,0xd76e2820,0x1563,0x11cf,0xac,0x98,0x0,0xaa,0x0,0x4c,0xf,0xa9)
-OUR_GUID_ENTRY(CLSID_FileWriter,0x8596e5f0,0xda5,0x11d0,0xbd,0x21,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_AviDest,0xe2510970,0xf137,0x11ce,0x8b,0x67,0x0,0xaa,0x0,0xa3,0xf1,0xa6)
-OUR_GUID_ENTRY(CLSID_AviMuxProptyPage,0xc647b5c0,0x157c,0x11d0,0xbd,0x23,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_AviMuxProptyPage1,0xa9ae910,0x85c0,0x11d0,0xbd,0x42,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_AVIMIDIRender,0x07b65360,0xc445,0x11ce,0xaf,0xde,0x00,0xaa,0x00,0x6c,0x14,0xf4)
-OUR_GUID_ENTRY(CLSID_WMAsfReader,0x187463a0,0x5bb7,0x11d3,0xac,0xbe,0x0,0x80,0xc7,0x5e,0x24,0x6e)
-OUR_GUID_ENTRY(CLSID_WMAsfWriter,0x7c23220e,0x55bb,0x11d3,0x8b,0x16,0x0,0xc0,0x4f,0xb6,0xbd,0x3d)
-OUR_GUID_ENTRY(CLSID_MPEG2Demultiplexer,0xafb6c280,0x2c41,0x11d3,0x8a,0x60,0x00,0x00,0xf8,0x1e,0x0e,0x4a)
-OUR_GUID_ENTRY(CLSID_MMSPLITTER,0x3ae86b20,0x7be8,0x11d1,0xab,0xe6,0x00,0xa0,0xc9,0x05,0xf3,0x75)
-OUR_GUID_ENTRY(CLSID_StreamBufferSink,0x2db47ae5,0xcf39,0x43c2,0xb4,0xd6,0xc,0xd8,0xd9,0x9,0x46,0xf4)
-OUR_GUID_ENTRY(CLSID_StreamBufferSource,0xc9f5fe02,0xf851,0x4eb5,0x99,0xee,0xad,0x60,0x2a,0xf1,0xe6,0x19)
-OUR_GUID_ENTRY(CLSID_StreamBufferConfig,0xfa8a68b2,0xc864,0x4ba2,0xad,0x53,0xd3,0x87,0x6a,0x87,0x49,0x4b)
-OUR_GUID_ENTRY(CLSID_Mpeg2VideoStreamAnalyzer,0x6cfad761,0x735d,0x4aa5,0x8a,0xfc,0xaf,0x91,0xa7,0xd6,0x1e,0xba)
-OUR_GUID_ENTRY(CLSID_StreamBufferRecordingAttributes,0xccaa63ac,0x1057,0x4778,0xae,0x92,0x12,0x6,0xab,0x9a,0xce,0xe6)
-OUR_GUID_ENTRY(CLSID_StreamBufferComposeRecording,0xd682c4ba,0xa90a,0x42fe,0xb9,0xe1,0x3,0x10,0x98,0x49,0xc4,0x23)
-OUR_GUID_ENTRY(CLSID_DVVideoCodec,0xb1b77c00,0xc3e4,0x11cf,0xaf,0x79,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(CLSID_DVVideoEnc,0x13aa3650,0xbb6f,0x11d0,0xaf,0xb9,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(CLSID_DVSplitter,0x4eb31670,0x9fc6,0x11cf,0xaf,0x6e,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(CLSID_DVMux,0x129d7e40,0xc10d,0x11d0,0xaf,0xb9,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(CLSID_SeekingPassThru,0x60af76c,0x68dd,0x11d0,0x8f,0xc1,0x0,0xc0,0x4f,0xd9,0x18,0x9d)
-OUR_GUID_ENTRY(CLSID_Line21Decoder,0x6e8d4a20,0x310c,0x11d0,0xb7,0x9a,0x0,0xaa,0x0,0x37,0x67,0xa7)
-OUR_GUID_ENTRY(CLSID_Line21Decoder2,0xe4206432,0x01a1,0x4bee,0xb3,0xe1,0x37,0x02,0xc8,0xed,0xc5,0x74)
-OUR_GUID_ENTRY(CLSID_OverlayMixer,0xcd8743a1,0x3736,0x11d0,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(CLSID_VBISurfaces,0x814b9800,0x1c88,0x11d1,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(CLSID_WSTDecoder,0x70bc06e0,0x5666,0x11d3,0xa1,0x84,0x0,0x10,0x5a,0xef,0x9f,0x33)
-OUR_GUID_ENTRY(CLSID_MjpegDec,0x301056d0,0x6dff,0x11d2,0x9e,0xeb,0x0,0x60,0x8,0x3,0x9e,0x37)
-OUR_GUID_ENTRY(CLSID_MJPGEnc,0xb80ab0a0,0x7416,0x11d2,0x9e,0xeb,0x0,0x60,0x8,0x3,0x9e,0x37)
-OUR_GUID_ENTRY(CLSID_SystemDeviceEnum,0x62BE5D10,0x60EB,0x11d0,0xBD,0x3B,0x00,0xA0,0xC9,0x11,0xCE,0x86)
-OUR_GUID_ENTRY(CLSID_CDeviceMoniker,0x4315D437,0x5B8C,0x11d0,0xBD,0x3B,0x00,0xA0,0xC9,0x11,0xCE,0x86)
-OUR_GUID_ENTRY(CLSID_VideoInputDeviceCategory,0x860BB310,0x5D01,0x11d0,0xBD,0x3B,0x00,0xA0,0xC9,0x11,0xCE,0x86)
-OUR_GUID_ENTRY(CLSID_CVidCapClassManager,0x860BB310,0x5D01,0x11d0,0xBD,0x3B,0x00,0xA0,0xC9,0x11,0xCE,0x86)
-OUR_GUID_ENTRY(CLSID_LegacyAmFilterCategory,0x083863F1,0x70DE,0x11d0,0xBD,0x40,0x00,0xA0,0xC9,0x11,0xCE,0x86)
-OUR_GUID_ENTRY(CLSID_CQzFilterClassManager,0x083863F1,0x70DE,0x11d0,0xBD,0x40,0x00,0xA0,0xC9,0x11,0xCE,0x86)
-OUR_GUID_ENTRY(CLSID_VideoCompressorCategory,0x33d9a760,0x90c8,0x11d0,0xbd,0x43,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_CIcmCoClassManager,0x33d9a760,0x90c8,0x11d0,0xbd,0x43,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_AudioCompressorCategory,0x33d9a761,0x90c8,0x11d0,0xbd,0x43,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_CAcmCoClassManager,0x33d9a761,0x90c8,0x11d0,0xbd,0x43,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_AudioInputDeviceCategory,0x33d9a762,0x90c8,0x11d0,0xbd,0x43,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_CWaveinClassManager,0x33d9a762,0x90c8,0x11d0,0xbd,0x43,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_AudioRendererCategory,0xe0f158e1,0xcb04,0x11d0,0xbd,0x4e,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_CWaveOutClassManager,0xe0f158e1,0xcb04,0x11d0,0xbd,0x4e,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_MidiRendererCategory,0x4EfE2452,0x168A,0x11d1,0xBC,0x76,0x0,0xc0,0x4F,0xB9,0x45,0x3B)
-OUR_GUID_ENTRY(CLSID_CMidiOutClassManager,0x4EfE2452,0x168A,0x11d1,0xBC,0x76,0x0,0xc0,0x4F,0xB9,0x45,0x3B)
-OUR_GUID_ENTRY(CLSID_TransmitCategory,0xcc7bfb41,0xf175,0x11d1,0xa3,0x92,0x0,0xe0,0x29,0x1f,0x39,0x59)
-OUR_GUID_ENTRY(CLSID_DeviceControlCategory,0xcc7bfb46,0xf175,0x11d1,0xa3,0x92,0x0,0xe0,0x29,0x1f,0x39,0x59)
-OUR_GUID_ENTRY(CLSID_ActiveMovieCategories,0xda4e3da0,0xd07d,0x11d0,0xbd,0x50,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_DVDHWDecodersCategory,0x2721AE20,0x7E70,0x11D0,0xA5,0xD6,0x28,0xDB,0x04,0xC1,0x00,0x00)
-OUR_GUID_ENTRY(CLSID_MediaEncoderCategory,0x7D22E920,0x5CA9,0x4787,0x8C,0x2B,0xA6,0x77,0x9B,0xD1,0x17,0x81)
-OUR_GUID_ENTRY(CLSID_MediaMultiplexerCategory,0x236C9559,0xADCE,0x4736,0xBF,0x72,0xBA,0xB3,0x4E,0x39,0x21,0x96)
-OUR_GUID_ENTRY(CLSID_FilterMapper2,0xcda42200,0xbd88,0x11d0,0xbd,0x4e,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_MemoryAllocator,0x1e651cc0,0xb199,0x11d0,0x82,0x12,0x00,0xc0,0x4f,0xc3,0x2c,0x45)
-OUR_GUID_ENTRY(CLSID_MediaPropertyBag,0xcdbd8d00,0xc193,0x11d0,0xbd,0x4e,0x0,0xa0,0xc9,0x11,0xce,0x86)
-OUR_GUID_ENTRY(CLSID_DvdGraphBuilder,0xFCC152B7,0xF372,0x11d0,0x8E,0x00,0x00,0xC0,0x4F,0xD7,0xC0,0x8B)
-OUR_GUID_ENTRY(CLSID_DVDNavigator,0x9b8c4620,0x2c1a,0x11d0,0x84,0x93,0x0,0xa0,0x24,0x38,0xad,0x48)
-OUR_GUID_ENTRY(CLSID_DVDState,0xf963c5cf,0xa659,0x4a93,0x96,0x38,0xca,0xf3,0xcd,0x27,0x7d,0x13)
-OUR_GUID_ENTRY(CLSID_SmartTee,0xcc58e280,0x8aa1,0x11d1,0xb3,0xf1,0x0,0xaa,0x0,0x37,0x61,0xc5)
-OUR_GUID_ENTRY(FORMAT_None,0x0F6417D6,0xc318,0x11d0,0xa4,0x3f,0x00,0xa0,0xc9,0x22,0x31,0x96)
-OUR_GUID_ENTRY(FORMAT_VideoInfo,0x05589f80,0xc356,0x11ce,0xbf,0x01,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(FORMAT_VideoInfo2,0xf72a76A0,0xeb0a,0x11d0,0xac,0xe4,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(FORMAT_WaveFormatEx,0x05589f81,0xc356,0x11ce,0xbf,0x01,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(FORMAT_MPEGVideo,0x05589f82,0xc356,0x11ce,0xbf,0x01,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(FORMAT_MPEGStreams,0x05589f83,0xc356,0x11ce,0xbf,0x01,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(FORMAT_DvInfo,0x05589f84,0xc356,0x11ce,0xbf,0x01,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(CLSID_DirectDrawProperties,0x944d4c00,0xdd52,0x11ce,0xbf,0x0e,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(CLSID_PerformanceProperties,0x59ce6880,0xacf8,0x11cf,0xb5,0x6e,0x00,0x80,0xc7,0xc4,0xb6,0x8a)
-OUR_GUID_ENTRY(CLSID_QualityProperties,0x418afb70,0xf8b8,0x11ce,0xaa,0xc6,0x00,0x20,0xaf,0x0b,0x99,0xa3)
-OUR_GUID_ENTRY(IID_IBaseVideoMixer,0x61ded640,0xe912,0x11ce,0xa0,0x99,0x00,0xaa,0x00,0x47,0x9a,0x58)
-OUR_GUID_ENTRY(IID_IDirectDrawVideo,0x36d39eb0,0xdd75,0x11ce,0xbf,0x0e,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(IID_IQualProp,0x1bd0ecb0,0xf8e2,0x11ce,0xaa,0xc6,0x00,0x20,0xaf,0x0b,0x99,0xa3)
-OUR_GUID_ENTRY(CLSID_VPObject,0xce292861,0xfc88,0x11d0,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(IID_IVPObject,0xce292862,0xfc88,0x11d0,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(IID_IVPControl,0x25df12c1,0x3de0,0x11d1,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(CLSID_VPVBIObject,0x814b9801,0x1c88,0x11d1,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(IID_IVPVBIObject,0x814b9802,0x1c88,0x11d1,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(IID_IVPConfig,0xbc29a660,0x30e3,0x11d0,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(IID_IVPNotify,0xc76794a1,0xd6c5,0x11d0,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(IID_IVPNotify2,0xebf47183,0x8764,0x11d1,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(IID_IVPVBIConfig,0xec529b00,0x1a1f,0x11d1,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(IID_IVPVBINotify,0xec529b01,0x1a1f,0x11d1,0xba,0xd9,0x0,0x60,0x97,0x44,0x11,0x1a)
-OUR_GUID_ENTRY(IID_IMixerPinConfig,0x593cdde1,0x759,0x11d1,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-OUR_GUID_ENTRY(IID_IMixerPinConfig2,0xebf47182,0x8764,0x11d1,0x9e,0x69,0x0,0xc0,0x4f,0xd7,0xc1,0x5b)
-#ifndef __DDRAW_INCLUDED__
-OUR_GUID_ENTRY(CLSID_DirectDraw,0xD7B70EE0,0x4340,0x11CF,0xB0,0x63,0x00,0x20,0xAF,0xC2,0xCD,0x35)
-OUR_GUID_ENTRY(CLSID_DirectDrawClipper,0x593817A0,0x7DB3,0x11CF,0xA2,0xDE,0x00,0xAA,0x00,0xb9,0x33,0x56)
-OUR_GUID_ENTRY(IID_IDirectDraw,0x6C14DB80,0xA733,0x11CE,0xA5,0x21,0x00,0x20,0xAF,0x0B,0xE5,0x60)
-OUR_GUID_ENTRY(IID_IDirectDraw2,0xB3A6F3E0,0x2B43,0x11CF,0xA2,0xDE,0x00,0xAA,0x00,0xB9,0x33,0x56)
-OUR_GUID_ENTRY(IID_IDirectDrawSurface,0x6C14DB81,0xA733,0x11CE,0xA5,0x21,0x00,0x20,0xAF,0x0B,0xE5,0x60)
-OUR_GUID_ENTRY(IID_IDirectDrawSurface2,0x57805885,0x6eec,0x11cf,0x94,0x41,0xa8,0x23,0x03,0xc1,0x0e,0x27)
-OUR_GUID_ENTRY(IID_IDirectDrawSurface3,0xDA044E00,0x69B2,0x11D0,0xA1,0xD5,0x00,0xAA,0x00,0xB8,0xDF,0xBB)
-OUR_GUID_ENTRY(IID_IDirectDrawSurface4,0x0B2B8630,0xAD35,0x11D0,0x8E,0xA6,0x00,0x60,0x97,0x97,0xEA,0x5B)
-OUR_GUID_ENTRY(IID_IDirectDrawSurface7,0x06675a80,0x3b9b,0x11d2,0xb9,0x2f,0x00,0x60,0x97,0x97,0xea,0x5b)
-OUR_GUID_ENTRY(IID_IDirectDrawPalette,0x6C14DB84,0xA733,0x11CE,0xA5,0x21,0x00,0x20,0xAF,0x0B,0xE5,0x60)
-OUR_GUID_ENTRY(IID_IDirectDrawClipper,0x6C14DB85,0xA733,0x11CE,0xA5,0x21,0x00,0x20,0xAF,0x0B,0xE5,0x60)
-OUR_GUID_ENTRY(IID_IDirectDrawColorControl,0x4B9F0EE0,0x0D7E,0x11D0,0x9B,0x06,0x00,0xA0,0xC9,0x03,0xA3,0xB8)
-#endif
-#ifndef __DVP_INCLUDED__
-OUR_GUID_ENTRY(IID_IDDVideoPortContainer,0x6C142760,0xA733,0x11CE,0xA5,0x21,0x00,0x20,0xAF,0x0B,0xE5,0x60)
-#endif
-#ifndef __DDKM_INCLUDED__
-OUR_GUID_ENTRY(IID_IDirectDrawKernel,0x8D56C120,0x6A08,0x11D0,0x9B,0x06,0x00,0xA0,0xC9,0x03,0xA3,0xB8)
-OUR_GUID_ENTRY(IID_IDirectDrawSurfaceKernel,0x60755DA0,0x6A40,0x11D0,0x9B,0x06,0x00,0xA0,0xC9,0x03,0xA3,0xB8)
-#endif
-OUR_GUID_ENTRY(CLSID_ModexProperties,0x0618aa30,0x6bc4,0x11cf,0xbf,0x36,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(IID_IFullScreenVideo,0xdd1d7110,0x7836,0x11cf,0xbf,0x47,0x00,0xaa,0x00,0x55,0x59,0x5a)
-OUR_GUID_ENTRY(IID_IFullScreenVideoEx,0x53479470,0xf1dd,0x11cf,0xbc,0x42,0x00,0xaa,0x00,0xac,0x74,0xf6)
-OUR_GUID_ENTRY(CLSID_DVDecPropertiesPage,0x101193c0,0xbfe,0x11d0,0xaf,0x91,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(CLSID_DVEncPropertiesPage,0x4150f050,0xbb6f,0x11d0,0xaf,0xb9,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(CLSID_DVMuxPropertyPage,0x4db880e0,0xc10d,0x11d0,0xaf,0xb9,0x0,0xaa,0x0,0xb6,0x7a,0x42)
-OUR_GUID_ENTRY(IID_IAMDirectSound,0x546f4260,0xd53e,0x11cf,0xb3,0xf0,0x0,0xaa,0x0,0x37,0x61,0xc5)
-OUR_GUID_ENTRY(IID_IMpegAudioDecoder,0xb45dd570,0x3c77,0x11d1,0xab,0xe1,0x00,0xa0,0xc9,0x05,0xf3,0x75)
-OUR_GUID_ENTRY(IID_IAMLine21Decoder,0x6e8d4a21,0x310c,0x11d0,0xb7,0x9a,0x0,0xaa,0x0,0x37,0x67,0xa7)
-OUR_GUID_ENTRY(IID_IAMWstDecoder,0xc056de21,0x75c2,0x11d3,0xa1,0x84,0x0,0x10,0x5a,0xef,0x9f,0x33)
-OUR_GUID_ENTRY(CLSID_WstDecoderPropertyPage,0x4e27f80,0x91e4,0x11d3,0xa1,0x84,0x0,0x10,0x5a,0xef,0x9f,0x33)
-OUR_GUID_ENTRY(FORMAT_AnalogVideo,0x482dde0,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIATYPE_AnalogVideo,0x482dde1,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_NTSC_M,0x482dde2,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_B,0x482dde5,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_D,0x482dde6,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_G,0x482dde7,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_H,0x482dde8,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_I,0x482dde9,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_M,0x482ddea,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_N,0x482ddeb,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_PAL_N_COMBO,0x482ddec,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_SECAM_B,0x482ddf0,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_SECAM_D,0x482ddf1,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_SECAM_G,0x482ddf2,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_SECAM_H,0x482ddf3,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_SECAM_K,0x482ddf4,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_SECAM_K1,0x482ddf5,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIASUBTYPE_AnalogVideo_SECAM_L,0x482ddf6,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(MEDIATYPE_AnalogAudio,0x482dee1,0x7817,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-
-#include "dshow/ksuuids.h"
-
-OUR_GUID_ENTRY(TIME_FORMAT_NONE,0L,0,0,0,0,0,0,0,0,0,0)
-OUR_GUID_ENTRY(TIME_FORMAT_FRAME,0x7b785570,0x8c82,0x11cf,0xbc,0xc,0x0,0xaa,0x0,0xac,0x74,0xf6)
-OUR_GUID_ENTRY(TIME_FORMAT_BYTE,0x7b785571,0x8c82,0x11cf,0xbc,0xc,0x0,0xaa,0x0,0xac,0x74,0xf6)
-OUR_GUID_ENTRY(TIME_FORMAT_SAMPLE,0x7b785572,0x8c82,0x11cf,0xbc,0xc,0x0,0xaa,0x0,0xac,0x74,0xf6)
-OUR_GUID_ENTRY(TIME_FORMAT_FIELD,0x7b785573,0x8c82,0x11cf,0xbc,0xc,0x0,0xaa,0x0,0xac,0x74,0xf6)
-OUR_GUID_ENTRY(TIME_FORMAT_MEDIA_TIME,0x7b785574,0x8c82,0x11cf,0xbc,0xc,0x0,0xaa,0x0,0xac,0x74,0xf6)
-OUR_GUID_ENTRY(AMPROPSETID_Pin,0x9b00f101,0x1567,0x11d1,0xb3,0xf1,0x0,0xaa,0x0,0x37,0x61,0xc5)
-OUR_GUID_ENTRY(PIN_CATEGORY_CAPTURE,0xfb6c4281,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_PREVIEW,0xfb6c4282,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_ANALOGVIDEOIN,0xfb6c4283,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_VBI,0xfb6c4284,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_VIDEOPORT,0xfb6c4285,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_NABTS,0xfb6c4286,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_EDS,0xfb6c4287,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_TELETEXT,0xfb6c4288,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_CC,0xfb6c4289,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_STILL,0xfb6c428a,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_TIMECODE,0xfb6c428b,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(PIN_CATEGORY_VIDEOPORT_VBI,0xfb6c428c,0x0353,0x11d1,0x90,0x5f,0x00,0x00,0xc0,0xcc,0x16,0xba)
-OUR_GUID_ENTRY(LOOK_UPSTREAM_ONLY,0xac798be0,0x98e3,0x11d1,0xb3,0xf1,0x0,0xaa,0x0,0x37,0x61,0xc5)
-OUR_GUID_ENTRY(LOOK_DOWNSTREAM_ONLY,0xac798be1,0x98e3,0x11d1,0xb3,0xf1,0x0,0xaa,0x0,0x37,0x61,0xc5)
-OUR_GUID_ENTRY(CLSID_TVTunerFilterPropertyPage,0x266eee41,0x6c63,0x11cf,0x8a,0x3,0x0,0xaa,0x0,0x6e,0xcb,0x65)
-OUR_GUID_ENTRY(CLSID_CrossbarFilterPropertyPage,0x71f96461,0x78f3,0x11d0,0xa1,0x8c,0x0,0xa0,0xc9,0x11,0x89,0x56)
-OUR_GUID_ENTRY(CLSID_TVAudioFilterPropertyPage,0x71f96463,0x78f3,0x11d0,0xa1,0x8c,0x0,0xa0,0xc9,0x11,0x89,0x56)
-OUR_GUID_ENTRY(CLSID_VideoProcAmpPropertyPage,0x71f96464,0x78f3,0x11d0,0xa1,0x8c,0x0,0xa0,0xc9,0x11,0x89,0x56)
-OUR_GUID_ENTRY(CLSID_CameraControlPropertyPage,0x71f96465,0x78f3,0x11d0,0xa1,0x8c,0x0,0xa0,0xc9,0x11,0x89,0x56)
-OUR_GUID_ENTRY(CLSID_AnalogVideoDecoderPropertyPage,0x71f96466,0x78f3,0x11d0,0xa1,0x8c,0x0,0xa0,0xc9,0x11,0x89,0x56)
-OUR_GUID_ENTRY(CLSID_VideoStreamConfigPropertyPage,0x71f96467,0x78f3,0x11d0,0xa1,0x8c,0x0,0xa0,0xc9,0x11,0x89,0x56)
-OUR_GUID_ENTRY(CLSID_AudioRendererAdvancedProperties,0x37e92a92,0xd9aa,0x11d2,0xbf,0x84,0x8e,0xf2,0xb1,0x55,0x5a,0xed)
-OUR_GUID_ENTRY(CLSID_VideoMixingRenderer,0xB87BEB7B,0x8D29,0x423f,0xAE,0x4D,0x65,0x82,0xC1,0x01,0x75,0xAC)
-OUR_GUID_ENTRY(CLSID_VideoRendererDefault,0x6BC1CFFA,0x8FC1,0x4261,0xAC,0x22,0xCF,0xB4,0xCC,0x38,0xDB,0x50)
-OUR_GUID_ENTRY(CLSID_AllocPresenter,0x99d54f63,0x1a69,0x41ae,0xaa,0x4d,0xc9,0x76,0xeb,0x3f,0x07,0x13)
-OUR_GUID_ENTRY(CLSID_AllocPresenterDDXclMode,0x4444ac9e,0x242e,0x471b,0xa3,0xc7,0x45,0xdc,0xd4,0x63,0x52,0xbc)
-OUR_GUID_ENTRY(CLSID_VideoPortManager,0x6f26a6cd,0x967b,0x47fd,0x87,0x4a,0x7a,0xed,0x2c,0x9d,0x25,0xa2)
-OUR_GUID_ENTRY(CLSID_VideoMixingRenderer9,0x51b4abf3,0x748f,0x4e3b,0xa2,0x76,0xc8,0x28,0x33,0x0e,0x92,0x6a)
-OUR_GUID_ENTRY(CLSID_ATSCNetworkProvider,0x0dad2fdd,0x5fd7,0x11d3,0x8f,0x50,0x00,0xc0,0x4f,0x79,0x71,0xe2)
-OUR_GUID_ENTRY(CLSID_ATSCNetworkPropertyPage,0xe3444d16,0x5ac4,0x4386,0x88,0xdf,0x13,0xfd,0x23,0x0e,0x1d,0xda)
-OUR_GUID_ENTRY(CLSID_DVBSNetworkProvider,0xfa4b375a,0x45b4,0x4d45,0x84,0x40,0x26,0x39,0x57,0xb1,0x16,0x23)
-OUR_GUID_ENTRY(CLSID_DVBTNetworkProvider,0x216c62df,0x6d7f,0x4e9a,0x85,0x71,0x5,0xf1,0x4e,0xdb,0x76,0x6a)
-OUR_GUID_ENTRY(CLSID_DVBCNetworkProvider,0xdc0c0fe7,0x485,0x4266,0xb9,0x3f,0x68,0xfb,0xf8,0xe,0xd8,0x34)
-OUR_GUID_ENTRY(CLSID_DShowTVEFilter,0x05500280,0xFAA5,0x4DF9,0x82,0x46,0xBF,0xC2,0x3A,0xC5,0xCE,0xA8)
-OUR_GUID_ENTRY(CLSID_TVEFilterTuneProperties,0x05500281,0xFAA5,0x4DF9,0x82,0x46,0xBF,0xC2,0x3A,0xC5,0xCE,0xA8)
-OUR_GUID_ENTRY(CLSID_TVEFilterCCProperties,0x05500282,0xFAA5,0x4DF9,0x82,0x46,0xBF,0xC2,0x3A,0xC5,0xCE,0xA8)
-OUR_GUID_ENTRY(CLSID_TVEFilterStatsProperties,0x05500283,0xFAA5,0x4DF9,0x82,0x46,0xBF,0xC2,0x3A,0xC5,0xCE,0xA8)
-OUR_GUID_ENTRY(CLSID_IVideoEncoderProxy,0xb43c4eec,0x8c32,0x4791,0x91,0x2,0x50,0x8a,0xda,0x5e,0xe8,0xe7)
-OUR_GUID_ENTRY(CLSID_ICodecAPIProxy,0x7ff0997a,0x1999,0x4286,0xa7,0x3c,0x62,0x2b,0x88,0x14,0xe7,0xeb)
-OUR_GUID_ENTRY(CLSID_IVideoEncoderCodecAPIProxy,0xb05dabd9,0x56e5,0x4fdc,0xaf,0xa4,0x8a,0x47,0xe9,0x1f,0x1c,0x9c)
-
-#ifndef __ENCODER_API_GUIDS__
-#define __ENCODER_API_GUIDS__
-OUR_GUID_ENTRY(ENCAPIPARAM_BITRATE,0x49cc4c43,0xca83,0x4ad4,0xa9,0xaf,0xf3,0x69,0x6a,0xf6,0x66,0xdf)
-OUR_GUID_ENTRY(ENCAPIPARAM_PEAK_BITRATE,0x703f16a9,0x3d48,0x44a1,0xb0,0x77,0x1,0x8d,0xff,0x91,0x5d,0x19)
-OUR_GUID_ENTRY(ENCAPIPARAM_BITRATE_MODE,0xee5fb25c,0xc713,0x40d1,0x9d,0x58,0xc0,0xd7,0x24,0x1e,0x25,0xf)
-OUR_GUID_ENTRY(CODECAPI_CHANGELISTS,0x62b12acf,0xf6b0,0x47d9,0x94,0x56,0x96,0xf2,0x2c,0x4e,0x0b,0x9d)
-OUR_GUID_ENTRY(CODECAPI_VIDEO_ENCODER,0x7112e8e1,0x3d03,0x47ef,0x8e,0x60,0x03,0xf1,0xcf,0x53,0x73,0x01)
-OUR_GUID_ENTRY(CODECAPI_AUDIO_ENCODER,0xb9d19a3e,0xf897,0x429c,0xbc,0x46,0x81,0x38,0xb7,0x27,0x2b,0x2d)
-OUR_GUID_ENTRY(CODECAPI_SETALLDEFAULTS,0x6c5e6a7c,0xacf8,0x4f55,0xa9,0x99,0x1a,0x62,0x81,0x09,0x05,0x1b)
-OUR_GUID_ENTRY(CODECAPI_ALLSETTINGS,0x6a577e92,0x83e1,0x4113,0xad,0xc2,0x4f,0xce,0xc3,0x2f,0x83,0xa1)
-OUR_GUID_ENTRY(CODECAPI_SUPPORTSEVENTS,0x0581af97,0x7693,0x4dbd,0x9d,0xca,0x3f,0x9e,0xbd,0x65,0x85,0xa1)
-OUR_GUID_ENTRY(CODECAPI_CURRENTCHANGELIST,0x1cb14e83,0x7d72,0x4657,0x83,0xfd,0x47,0xa2,0xc5,0xb9,0xd1,0x3d)
-#endif
-
-#undef OUR_GUID_ENTRY
--- a/3rdparty/include/f2c.h
+++ b/3rdparty/include/f2c.h
@ -0,0 +1,253 @@
+/* f2c.h  --  Standard Fortran to C header file */
+
+/**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
+
+	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
+
+#ifndef F2C_INCLUDE
+#define F2C_INCLUDE
+
+#include <assert.h>
+#include <math.h>
+#include <ctype.h>
+#include <stdlib.h>
+/* needed for Windows Mobile */
+#ifdef WINCE
+#undef complex; 
+#endif
+#include <string.h>
+#include <stdio.h>
+
+#if __SSE2__ || defined _M_X64
+#include "emmintrin.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef int integer;
+typedef unsigned int uinteger;
+typedef char *address;
+typedef short int shortint;
+typedef float real;
+typedef double doublereal;
+typedef struct { real r, i; } complex;
+typedef struct { doublereal r, i; } doublecomplex;
+typedef int logical;
+typedef short int shortlogical;
+typedef char logical1;
+typedef char integer1;
+#ifdef INTEGER_STAR_8	/* Adjust for integer*8. */
+typedef long long longint;		/* system-dependent */
+typedef unsigned long long ulongint;	/* system-dependent */
+#define qbit_clear(a,b)	((a) & ~((ulongint)1 << (b)))
+#define qbit_set(a,b)	((a) |  ((ulongint)1 << (b)))
+#endif
+
+#define TRUE_ (1)
+#define FALSE_ (0)
+
+/* Extern is for use with -E */
+#ifndef Extern
+#define Extern extern
+#endif
+
+/* I/O stuff */
+
+#ifdef f2c_i2
+/* for -i2 */
+typedef short flag;
+typedef short ftnlen;
+typedef short ftnint;
+#else
+typedef int flag;
+typedef int ftnlen;
+typedef int ftnint;
+#endif
+
+/*external read, write*/
+typedef struct
+{	flag cierr;
+	ftnint ciunit;
+	flag ciend;
+	char *cifmt;
+	ftnint cirec;
+} cilist;
+
+/*internal read, write*/
+typedef struct
+{	flag icierr;
+	char *iciunit;
+	flag iciend;
+	char *icifmt;
+	ftnint icirlen;
+	ftnint icirnum;
+} icilist;
+
+/*open*/
+typedef struct
+{	flag oerr;
+	ftnint ounit;
+	char *ofnm;
+	ftnlen ofnmlen;
+	char *osta;
+	char *oacc;
+	char *ofm;
+	ftnint orl;
+	char *oblnk;
+} olist;
+
+/*close*/
+typedef struct
+{	flag cerr;
+	ftnint cunit;
+	char *csta;
+} cllist;
+
+/*rewind, backspace, endfile*/
+typedef struct
+{	flag aerr;
+	ftnint aunit;
+} alist;
+
+/* inquire */
+typedef struct
+{	flag inerr;
+	ftnint inunit;
+	char *infile;
+	ftnlen infilen;
+	ftnint	*inex;	/*parameters in standard's order*/
+	ftnint	*inopen;
+	ftnint	*innum;
+	ftnint	*innamed;
+	char	*inname;
+	ftnlen	innamlen;
+	char	*inacc;
+	ftnlen	inacclen;
+	char	*inseq;
+	ftnlen	inseqlen;
+	char 	*indir;
+	ftnlen	indirlen;
+	char	*infmt;
+	ftnlen	infmtlen;
+	char	*inform;
+	ftnint	informlen;
+	char	*inunf;
+	ftnlen	inunflen;
+	ftnint	*inrecl;
+	ftnint	*innrec;
+	char	*inblank;
+	ftnlen	inblanklen;
+} inlist;
+
+#define VOID void
+
+union Multitype {	/* for multiple entry points */
+	integer1 g;
+	shortint h;
+	integer i;
+	/* longint j; */
+	real r;
+	doublereal d;
+	complex c;
+	doublecomplex z;
+	};
+
+typedef union Multitype Multitype;
+
+/*typedef long int Long;*/	/* No longer used; formerly in Namelist */
+
+struct Vardesc {	/* for Namelist */
+	char *name;
+	char *addr;
+	ftnlen *dims;
+	int  type;
+	};
+typedef struct Vardesc Vardesc;
+
+struct Namelist {
+	char *name;
+	Vardesc **vars;
+	int nvars;
+	};
+typedef struct Namelist Namelist;
+
+#ifndef abs
+#define abs(x) ((x) >= 0 ? (x) : -(x))
+#endif
+#define dabs(x) (doublereal)abs(x)
+#ifndef min
+#define min(a,b) ((a) <= (b) ? (a) : (b))
+#endif
+#ifndef max
+#define max(a,b) ((a) >= (b) ? (a) : (b))
+#endif
+#define dmin(a,b) (doublereal)min(a,b)
+#define dmax(a,b) (doublereal)max(a,b)
+#define bit_test(a,b)	((a) >> (b) & 1)
+#define bit_clear(a,b)	((a) & ~((uinteger)1 << (b)))
+#define bit_set(a,b)	((a) |  ((uinteger)1 << (b)))
+
+/* procedure parameter types for -A and -C++ */
+
+#define F2C_proc_par_types 1
+#ifdef __cplusplus
+typedef int /* Unknown procedure type */ (*U_fp)(...);
+typedef shortint (*J_fp)(...);
+typedef integer (*I_fp)(...);
+typedef real (*R_fp)(...);
+typedef doublereal (*D_fp)(...), (*E_fp)(...);
+typedef /* Complex */ VOID (*C_fp)(...);
+typedef /* Double Complex */ VOID (*Z_fp)(...);
+typedef logical (*L_fp)(...);
+typedef shortlogical (*K_fp)(...);
+typedef /* Character */ VOID (*H_fp)(...);
+typedef /* Subroutine */ int (*S_fp)(...);
+#else
+typedef int /* Unknown procedure type */ (*U_fp)();
+typedef shortint (*J_fp)();
+typedef integer (*I_fp)();
+typedef real (*R_fp)();
+typedef doublereal (*D_fp)(), (*E_fp)();
+typedef /* Complex */ VOID (*C_fp)();
+typedef /* Double Complex */ VOID (*Z_fp)();
+typedef logical (*L_fp)();
+typedef shortlogical (*K_fp)();
+typedef /* Character */ VOID (*H_fp)();
+typedef /* Subroutine */ int (*S_fp)();
+#endif
+/* E_fp is for real functions when -R is not specified */
+typedef VOID C_f;	/* complex function */
+typedef VOID H_f;	/* character function */
+typedef VOID Z_f;	/* double complex function */
+typedef doublereal E_f;	/* real function with -R not specified */
+
+/* undef any lower-case symbols that your C compiler predefines, e.g.: */
+
+#ifndef Skip_f2c_Undefs
+#undef cray
+#undef gcos
+#undef mc68010
+#undef mc68020
+#undef mips
+#undef pdp11
+#undef sgi
+#undef sparc
+#undef sun
+#undef sun2
+#undef sun3
+#undef sun4
+#undef u370
+#undef u3b
+#undef u3b2
+#undef u3b5
+#undef unix
+#undef vax
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
--- a/3rdparty/include/ffmpeg_/libavcodec/avcodec.h
+++ b/3rdparty/include/ffmpeg_/libavcodec/avcodec.h
--- a/3rdparty/include/ffmpeg_/libavcodec/avfft.h
+++ b/3rdparty/include/ffmpeg_/libavcodec/avfft.h
@ -0,0 +1,99 @@
+/*
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_AVFFT_H
+#define AVCODEC_AVFFT_H
+
+typedef float FFTSample;
+
+typedef struct FFTComplex {
+    FFTSample re, im;
+} FFTComplex;
+
+typedef struct FFTContext FFTContext;
+
+/**
+ * Set up a complex FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param inverse         if 0 perform the forward transform, if 1 perform the inverse
+ */
+FFTContext *av_fft_init(int nbits, int inverse);
+
+/**
+ * Do the permutation needed BEFORE calling ff_fft_calc().
+ */
+void av_fft_permute(FFTContext *s, FFTComplex *z);
+
+/**
+ * Do a complex FFT with the parameters defined in av_fft_init(). The
+ * input data must be permuted before. No 1.0/sqrt(n) normalization is done.
+ */
+void av_fft_calc(FFTContext *s, FFTComplex *z);
+
+void av_fft_end(FFTContext *s);
+
+FFTContext *av_mdct_init(int nbits, int inverse, double scale);
+void av_imdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_imdct_half(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_calc(FFTContext *s, FFTSample *output, const FFTSample *input);
+void av_mdct_end(FFTContext *s);
+
+/* Real Discrete Fourier Transform */
+
+enum RDFTransformType {
+    DFT_R2C,
+    IDFT_C2R,
+    IDFT_R2C,
+    DFT_C2R,
+};
+
+typedef struct RDFTContext RDFTContext;
+
+/**
+ * Set up a real FFT.
+ * @param nbits           log2 of the length of the input array
+ * @param trans           the type of transform
+ */
+RDFTContext *av_rdft_init(int nbits, enum RDFTransformType trans);
+void av_rdft_calc(RDFTContext *s, FFTSample *data);
+void av_rdft_end(RDFTContext *s);
+
+/* Discrete Cosine Transform */
+
+typedef struct DCTContext DCTContext;
+
+enum DCTTransformType {
+    DCT_II = 0,
+    DCT_III,
+    DCT_I,
+    DST_I,
+};
+
+/**
+ * Sets up DCT.
+ * @param nbits           size of the input array:
+ *                        (1 << nbits)     for DCT-II, DCT-III and DST-I
+ *                        (1 << nbits) + 1 for DCT-I
+ *
+ * @note the first element of the input of DST-I is ignored
+ */
+DCTContext *av_dct_init(int nbits, enum DCTTransformType type);
+void av_dct_calc(DCTContext *s, FFTSample *data);
+void av_dct_end (DCTContext *s);
+
+#endif /* AVCODEC_AVFFT_H */
--- a/Show More
+++ b/Show More