Merge pull request #6535 from sovrasov:lapack-hal
This commit is contained in:
@@ -221,6 +221,7 @@ OCV_OPTION(WITH_VA "Include VA support" OFF
|
|||||||
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
|
OCV_OPTION(WITH_VA_INTEL "Include Intel VA-API/OpenCL support" OFF IF (UNIX AND NOT ANDROID) )
|
||||||
OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
|
OCV_OPTION(WITH_GDAL "Include GDAL Support" OFF IF (NOT ANDROID AND NOT IOS AND NOT WINRT) )
|
||||||
OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" ON IF (UNIX AND NOT ANDROID) )
|
OCV_OPTION(WITH_GPHOTO2 "Include gPhoto2 library support" ON IF (UNIX AND NOT ANDROID) )
|
||||||
|
OCV_OPTION(WITH_LAPACK "Include Lapack library support" ON IF (UNIX AND NOT ANDROID) )
|
||||||
|
|
||||||
# OpenCV build components
|
# OpenCV build components
|
||||||
# ===================================================
|
# ===================================================
|
||||||
@@ -1173,6 +1174,10 @@ if(DEFINED WITH_VA_INTEL)
|
|||||||
status(" Use Intel VA-API/OpenCL:" HAVE_VA_INTEL THEN "YES (MSDK: ${VA_INTEL_MSDK_ROOT} OpenCL: ${VA_INTEL_IOCL_ROOT})" ELSE NO)
|
status(" Use Intel VA-API/OpenCL:" HAVE_VA_INTEL THEN "YES (MSDK: ${VA_INTEL_MSDK_ROOT} OpenCL: ${VA_INTEL_IOCL_ROOT})" ELSE NO)
|
||||||
endif(DEFINED WITH_VA_INTEL)
|
endif(DEFINED WITH_VA_INTEL)
|
||||||
|
|
||||||
|
if(DEFINED WITH_LAPACK)
|
||||||
|
status(" Use Lapack:" HAVE_LAPACK THEN "YES" ELSE NO)
|
||||||
|
endif(DEFINED WITH_LAPACK)
|
||||||
|
|
||||||
status(" Use Eigen:" HAVE_EIGEN THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO)
|
status(" Use Eigen:" HAVE_EIGEN THEN "YES (ver ${EIGEN_WORLD_VERSION}.${EIGEN_MAJOR_VERSION}.${EIGEN_MINOR_VERSION})" ELSE NO)
|
||||||
status(" Use Cuda:" HAVE_CUDA THEN "YES (ver ${CUDA_VERSION_STRING})" ELSE NO)
|
status(" Use Cuda:" HAVE_CUDA THEN "YES (ver ${CUDA_VERSION_STRING})" ELSE NO)
|
||||||
status(" Use OpenCL:" HAVE_OPENCL THEN YES ELSE NO)
|
status(" Use OpenCL:" HAVE_OPENCL THEN YES ELSE NO)
|
||||||
|
@@ -2,6 +2,19 @@
|
|||||||
# Detect other 3rd-party performance and math libraries
|
# Detect other 3rd-party performance and math libraries
|
||||||
# ----------------------------------------------------------------------------
|
# ----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# --- Lapack ---
|
||||||
|
if(WITH_LAPACK)
|
||||||
|
find_package(LAPACK)
|
||||||
|
if(LAPACK_FOUND)
|
||||||
|
find_path(LAPACK_INCLUDE_DIR "lapacke.h")
|
||||||
|
if(LAPACK_INCLUDE_DIR)
|
||||||
|
set(HAVE_LAPACK 1)
|
||||||
|
ocv_include_directories(${LAPACK_INCLUDE_DIR})
|
||||||
|
list(APPEND OPENCV_LINKER_LIBS ${LAPACK_LIBRARIES})
|
||||||
|
endif()
|
||||||
|
endif(LAPACK_FOUND)
|
||||||
|
endif(WITH_LAPACK)
|
||||||
|
|
||||||
# --- TBB ---
|
# --- TBB ---
|
||||||
if(WITH_TBB)
|
if(WITH_TBB)
|
||||||
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectTBB.cmake")
|
include("${OpenCV_SOURCE_DIR}/cmake/OpenCVDetectTBB.cmake")
|
||||||
|
@@ -197,3 +197,6 @@
|
|||||||
|
|
||||||
/* Intel VA-API/OpenCL */
|
/* Intel VA-API/OpenCL */
|
||||||
#cmakedefine HAVE_VA_INTEL
|
#cmakedefine HAVE_VA_INTEL
|
||||||
|
|
||||||
|
/* Lapack */
|
||||||
|
#cmakedefine HAVE_LAPACK
|
||||||
|
@@ -49,17 +49,6 @@
|
|||||||
#include "opencv2/core/cvstd.hpp"
|
#include "opencv2/core/cvstd.hpp"
|
||||||
#include "opencv2/core/hal/interface.h"
|
#include "opencv2/core/hal/interface.h"
|
||||||
|
|
||||||
//! @cond IGNORED
|
|
||||||
#define CALL_HAL(name, fun, ...) \
|
|
||||||
int res = fun(__VA_ARGS__); \
|
|
||||||
if (res == CV_HAL_ERROR_OK) \
|
|
||||||
return; \
|
|
||||||
else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
|
|
||||||
CV_Error_(cv::Error::StsInternal, \
|
|
||||||
("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
|
|
||||||
//! @endcond
|
|
||||||
|
|
||||||
|
|
||||||
namespace cv { namespace hal {
|
namespace cv { namespace hal {
|
||||||
|
|
||||||
//! @addtogroup core_hal_functions
|
//! @addtogroup core_hal_functions
|
||||||
@@ -75,6 +64,21 @@ CV_EXPORTS int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int
|
|||||||
CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
CV_EXPORTS int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
CV_EXPORTS bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n);
|
||||||
CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
CV_EXPORTS bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n);
|
||||||
|
CV_EXPORTS void SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int flags);
|
||||||
|
CV_EXPORTS void SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int flags);
|
||||||
|
|
||||||
|
CV_EXPORTS void gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
CV_EXPORTS void gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
CV_EXPORTS void gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
CV_EXPORTS void gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags);
|
||||||
|
|
||||||
CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
|
CV_EXPORTS int normL1_(const uchar* a, const uchar* b, int n);
|
||||||
CV_EXPORTS float normL1_(const float* a, const float* b, int n);
|
CV_EXPORTS float normL1_(const float* a, const float* b, int n);
|
||||||
|
@@ -158,6 +158,21 @@ typedef signed char schar;
|
|||||||
#define CV_HAL_DFT_IS_INPLACE 1024
|
#define CV_HAL_DFT_IS_INPLACE 1024
|
||||||
//! @}
|
//! @}
|
||||||
|
|
||||||
|
//! @name SVD flags
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_SVD_NO_UV 1
|
||||||
|
#define CV_HAL_SVD_SHORT_UV 2
|
||||||
|
#define CV_HAL_SVD_MODIFY_A 4
|
||||||
|
#define CV_HAL_SVD_FULL_UV 8
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @name Gemm flags
|
||||||
|
//! @{
|
||||||
|
#define CV_HAL_GEMM_1_T 1
|
||||||
|
#define CV_HAL_GEMM_2_T 2
|
||||||
|
#define CV_HAL_GEMM_3_T 4
|
||||||
|
//! @}
|
||||||
|
|
||||||
//! @}
|
//! @}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -438,7 +438,7 @@ template<typename _Tp, int m> struct Matx_DetOp
|
|||||||
return p;
|
return p;
|
||||||
for( int i = 0; i < m; i++ )
|
for( int i = 0; i < m; i++ )
|
||||||
p *= temp(i, i);
|
p *= temp(i, i);
|
||||||
return 1./p;
|
return p;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
485
modules/core/src/hal_internal.cpp
Normal file
485
modules/core/src/hal_internal.cpp
Normal file
@@ -0,0 +1,485 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#include "hal_internal.hpp"
|
||||||
|
|
||||||
|
#ifdef HAVE_LAPACK
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <lapacke.h>
|
||||||
|
#include <cblas.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <typeinfo>
|
||||||
|
#include <limits>
|
||||||
|
#include <complex>
|
||||||
|
|
||||||
|
#define HAL_GEMM_SMALL_COMPLEX_MATRIX_THRESH 100
|
||||||
|
#define HAL_GEMM_SMALL_MATRIX_THRESH 100
|
||||||
|
#define HAL_SVD_SMALL_MATRIX_THRESH 25
|
||||||
|
#define HAL_LU_SMALL_MATRIX_THRESH 100
|
||||||
|
#define HAL_CHOLESKY_SMALL_MATRIX_THRESH 100
|
||||||
|
|
||||||
|
//lapack stores matrices in column-major order so transposing is neded everywhere
|
||||||
|
template <typename fptype> static inline void
|
||||||
|
transpose_square_inplace(fptype *src, size_t src_ld, size_t m)
|
||||||
|
{
|
||||||
|
for(size_t i = 0; i < m - 1; i++)
|
||||||
|
for(size_t j = i + 1; j < m; j++)
|
||||||
|
std::swap(src[j*src_ld + i], src[i*src_ld + j]);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline void
|
||||||
|
transpose(const fptype *src, size_t src_ld, fptype* dst, size_t dst_ld, size_t m, size_t n)
|
||||||
|
{
|
||||||
|
for(size_t i = 0; i < m; i++)
|
||||||
|
for(size_t j = 0; j < n; j++)
|
||||||
|
dst[j*dst_ld + i] = src[i*src_ld + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline void
|
||||||
|
copy_matrix(const fptype *src, size_t src_ld, fptype* dst, size_t dst_ld, size_t m, size_t n)
|
||||||
|
{
|
||||||
|
for(size_t i = 0; i < m; i++)
|
||||||
|
for(size_t j = 0; j < n; j++)
|
||||||
|
dst[i*dst_ld + j] = src[i*src_ld + j];
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline void
|
||||||
|
set_value(fptype *dst, size_t dst_ld, fptype value, size_t m, size_t n)
|
||||||
|
{
|
||||||
|
for(size_t i = 0; i < m; i++)
|
||||||
|
for(size_t j = 0; j < n; j++)
|
||||||
|
dst[i*dst_ld + j] = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline int
|
||||||
|
lapack_LU(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, int* info)
|
||||||
|
{
|
||||||
|
int lda = a_step / sizeof(fptype), sign = 0;
|
||||||
|
int* piv = new int[m];
|
||||||
|
|
||||||
|
transpose_square_inplace(a, lda, m);
|
||||||
|
|
||||||
|
if(b)
|
||||||
|
{
|
||||||
|
if(n == 1 && b_step == sizeof(fptype))
|
||||||
|
{
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
sgesv_(&m, &n, (float*)a, &lda, piv, (float*)b, &m, info);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dgesv_(&m, &n, (double*)a, &lda, piv, (double*)b, &m, info);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int ldb = b_step / sizeof(fptype);
|
||||||
|
fptype* tmpB = new fptype[m*n];
|
||||||
|
|
||||||
|
transpose(b, ldb, tmpB, m, m, n);
|
||||||
|
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
sgesv_(&m, &n, (float*)a, &lda, piv, (float*)tmpB, &m, info);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dgesv_(&m, &n, (double*)a, &lda, piv, (double*)tmpB, &m, info);
|
||||||
|
|
||||||
|
transpose(tmpB, m, b, ldb, n, m);
|
||||||
|
delete[] tmpB;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
sgetrf_(&m, &m, (float*)a, &lda, piv, info);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dgetrf_(&m, &m, (double*)a, &lda, piv, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(*info == 0)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < m; i++)
|
||||||
|
sign ^= piv[i] != i + 1;
|
||||||
|
*info = sign ? -1 : 1;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
*info = 0; //in opencv LU function zero means error
|
||||||
|
|
||||||
|
delete[] piv;
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline int
|
||||||
|
lapack_Cholesky(fptype* a, size_t a_step, int m, fptype* b, size_t b_step, int n, bool* info)
|
||||||
|
{
|
||||||
|
int lapackStatus;
|
||||||
|
int lda = a_step / sizeof(fptype);
|
||||||
|
char L[] = {'L', '\0'};
|
||||||
|
|
||||||
|
if(b)
|
||||||
|
{
|
||||||
|
if(n == 1 && b_step == sizeof(fptype))
|
||||||
|
{
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
sposv_(L, &m, &n, (float*)a, &lda, (float*)b, &m, &lapackStatus);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dposv_(L, &m, &n, (double*)a, &lda, (double*)b, &m, &lapackStatus);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int ldb = b_step / sizeof(fptype);
|
||||||
|
fptype* tmpB = new fptype[m*n];
|
||||||
|
transpose(b, ldb, tmpB, m, m, n);
|
||||||
|
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
sposv_(L, &m, &n, (float*)a, &lda, (float*)tmpB, &m, &lapackStatus);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dposv_(L, &m, &n, (double*)a, &lda, (double*)tmpB, &m, &lapackStatus);
|
||||||
|
|
||||||
|
transpose(tmpB, m, b, ldb, n, m);
|
||||||
|
delete[] tmpB;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
spotrf_(L, &m, (float*)a, &lda, &lapackStatus);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dpotrf_(L, &m, (double*)a, &lda, &lapackStatus);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(lapackStatus == 0) *info = true;
|
||||||
|
else *info = false; //in opencv Cholesky function false means error
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline int
|
||||||
|
lapack_SVD(fptype* a, size_t a_step, fptype *w, fptype* u, size_t u_step, fptype* vt, size_t v_step, int m, int n, int flags, int* info)
|
||||||
|
{
|
||||||
|
int lda = a_step / sizeof(fptype);
|
||||||
|
int ldv = v_step / sizeof(fptype);
|
||||||
|
int ldu = u_step / sizeof(fptype);
|
||||||
|
int lwork = -1;
|
||||||
|
int* iworkBuf = new int[8*std::min(m, n)];
|
||||||
|
fptype work1 = 0;
|
||||||
|
|
||||||
|
//A already transposed and m>=n
|
||||||
|
char mode[] = { ' ', '\0'};
|
||||||
|
if(flags & CV_HAL_SVD_NO_UV)
|
||||||
|
{
|
||||||
|
ldv = 1;
|
||||||
|
mode[0] = 'N';
|
||||||
|
}
|
||||||
|
else if((flags & CV_HAL_SVD_SHORT_UV) && (flags & CV_HAL_SVD_MODIFY_A)) //short SVD, U stored in a
|
||||||
|
mode[0] = 'O';
|
||||||
|
else if((flags & CV_HAL_SVD_SHORT_UV) && !(flags & CV_HAL_SVD_MODIFY_A)) //short SVD, U stored in u if m>=n
|
||||||
|
mode[0] = 'S';
|
||||||
|
else if(flags & CV_HAL_SVD_FULL_UV) //full SVD, U stored in u or in a
|
||||||
|
mode[0] = 'A';
|
||||||
|
|
||||||
|
if((flags & CV_HAL_SVD_MODIFY_A) && (flags & CV_HAL_SVD_FULL_UV)) //U stored in a
|
||||||
|
{
|
||||||
|
u = new fptype[m*m];
|
||||||
|
ldu = m;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
sgesdd_(mode, &m, &n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)&work1, &lwork, iworkBuf, info);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dgesdd_(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)&work1, &lwork, iworkBuf, info);
|
||||||
|
|
||||||
|
lwork = round(work1); //optimal buffer size
|
||||||
|
fptype* buffer = new fptype[lwork + 1];
|
||||||
|
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
sgesdd_(mode, &m, &n, (float*)a, &lda, (float*)w, (float*)u, &ldu, (float*)vt, &ldv, (float*)buffer, &lwork, iworkBuf, info);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
dgesdd_(mode, &m, &n, (double*)a, &lda, (double*)w, (double*)u, &ldu, (double*)vt, &ldv, (double*)buffer, &lwork, iworkBuf, info);
|
||||||
|
|
||||||
|
if(!(flags & CV_HAL_SVD_NO_UV))
|
||||||
|
transpose_square_inplace(vt, ldv, n);
|
||||||
|
|
||||||
|
if((flags & CV_HAL_SVD_MODIFY_A) && (flags & CV_HAL_SVD_FULL_UV))
|
||||||
|
{
|
||||||
|
for(int i = 0; i < m; i++)
|
||||||
|
for(int j = 0; j < m; j++)
|
||||||
|
a[i*lda + j] = u[i*m + j];
|
||||||
|
delete[] u;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete[] iworkBuf;
|
||||||
|
delete[] buffer;
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline int
|
||||||
|
lapack_gemm(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
|
||||||
|
const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags)
|
||||||
|
{
|
||||||
|
int ldsrc1 = src1_step / sizeof(fptype);
|
||||||
|
int ldsrc2 = src2_step / sizeof(fptype);
|
||||||
|
int ldsrc3 = src3_step / sizeof(fptype);
|
||||||
|
int lddst = dst_step / sizeof(fptype);
|
||||||
|
int c_m, c_n, d_m;
|
||||||
|
CBLAS_TRANSPOSE transA, transB;
|
||||||
|
|
||||||
|
if(flags & CV_HAL_GEMM_2_T)
|
||||||
|
{
|
||||||
|
transB = CblasTrans;
|
||||||
|
if(flags & CV_HAL_GEMM_1_T )
|
||||||
|
{
|
||||||
|
d_m = a_n;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
d_m = a_m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
transB = CblasNoTrans;
|
||||||
|
if(flags & CV_HAL_GEMM_1_T )
|
||||||
|
{
|
||||||
|
d_m = a_n;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
d_m = a_m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(flags & CV_HAL_GEMM_3_T)
|
||||||
|
{
|
||||||
|
c_m = d_n;
|
||||||
|
c_n = d_m;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
c_m = d_m;
|
||||||
|
c_n = d_n;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(flags & CV_HAL_GEMM_1_T )
|
||||||
|
{
|
||||||
|
transA = CblasTrans;
|
||||||
|
std::swap(a_n, a_m);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
transA = CblasNoTrans;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(src3 != dst && beta != 0.0 && src3_step != 0) {
|
||||||
|
if(flags & CV_HAL_GEMM_3_T)
|
||||||
|
transpose(src3, ldsrc3, dst, lddst, c_m, c_n);
|
||||||
|
else
|
||||||
|
copy_matrix(src3, ldsrc3, dst, lddst, c_m, c_n);
|
||||||
|
}
|
||||||
|
else if (src3 == dst && (flags & CV_HAL_GEMM_3_T)) //actually transposing C in this case done by openCV
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
else if(src3_step == 0 && beta != 0.0)
|
||||||
|
set_value(dst, lddst, (fptype)0.0, d_m, d_n);
|
||||||
|
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
cblas_sgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (float)alpha, (float*)src1, ldsrc1, (float*)src2, ldsrc2, (float)beta, (float*)dst, lddst);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
cblas_dgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, (double)alpha, (double*)src1, ldsrc1, (double*)src2, ldsrc2, (double)beta, (double*)dst, lddst);
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
template <typename fptype> static inline int
|
||||||
|
lapack_gemm_c(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
|
||||||
|
const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int a_m, int a_n, int d_n, int flags)
|
||||||
|
{
|
||||||
|
int ldsrc1 = src1_step / sizeof(std::complex<fptype>);
|
||||||
|
int ldsrc2 = src2_step / sizeof(std::complex<fptype>);
|
||||||
|
int ldsrc3 = src3_step / sizeof(std::complex<fptype>);
|
||||||
|
int lddst = dst_step / sizeof(std::complex<fptype>);
|
||||||
|
int c_m, c_n, d_m;
|
||||||
|
CBLAS_TRANSPOSE transA, transB;
|
||||||
|
std::complex<fptype> cAlpha(alpha, 0.0);
|
||||||
|
std::complex<fptype> cBeta(beta, 0.0);
|
||||||
|
|
||||||
|
if(flags & CV_HAL_GEMM_2_T)
|
||||||
|
{
|
||||||
|
transB = CblasTrans;
|
||||||
|
if(flags & CV_HAL_GEMM_1_T )
|
||||||
|
{
|
||||||
|
d_m = a_n;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
d_m = a_m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
transB = CblasNoTrans;
|
||||||
|
if(flags & CV_HAL_GEMM_1_T )
|
||||||
|
{
|
||||||
|
d_m = a_n;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
d_m = a_m;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(flags & CV_HAL_GEMM_3_T)
|
||||||
|
{
|
||||||
|
c_m = d_n;
|
||||||
|
c_n = d_m;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
c_m = d_m;
|
||||||
|
c_n = d_n;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(flags & CV_HAL_GEMM_1_T )
|
||||||
|
{
|
||||||
|
transA = CblasTrans;
|
||||||
|
std::swap(a_n, a_m);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
transA = CblasNoTrans;
|
||||||
|
}
|
||||||
|
|
||||||
|
if(src3 != dst && beta != 0.0 && src3_step != 0) {
|
||||||
|
if(flags & CV_HAL_GEMM_3_T)
|
||||||
|
transpose((std::complex<fptype>*)src3, ldsrc3, (std::complex<fptype>*)dst, lddst, c_m, c_n);
|
||||||
|
else
|
||||||
|
copy_matrix((std::complex<fptype>*)src3, ldsrc3, (std::complex<fptype>*)dst, lddst, c_m, c_n);
|
||||||
|
}
|
||||||
|
else if (src3 == dst && (flags & CV_HAL_GEMM_3_T)) //actually transposing C in this case done by openCV
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
else if(src3_step == 0 && beta != 0.0)
|
||||||
|
set_value((std::complex<fptype>*)dst, lddst, std::complex<fptype>(0.0, 0.0), d_m, d_n);
|
||||||
|
|
||||||
|
if(typeid(fptype) == typeid(float))
|
||||||
|
cblas_cgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, &cAlpha, (void*)src1, ldsrc1, (void*)src2, ldsrc2, &cBeta, (void*)dst, lddst);
|
||||||
|
else if(typeid(fptype) == typeid(double))
|
||||||
|
cblas_zgemm(CblasRowMajor, transA, transB, a_m, d_n, a_n, &cAlpha, (void*)src1, ldsrc1, (void*)src2, ldsrc2, &cBeta, (void*)dst, lddst);
|
||||||
|
|
||||||
|
return CV_HAL_ERROR_OK;
|
||||||
|
}
|
||||||
|
int lapack_LU32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, int* info)
|
||||||
|
{
|
||||||
|
if(m < HAL_LU_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_LU(a, a_step, m, b, b_step, n, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_LU64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, int* info)
|
||||||
|
{
|
||||||
|
if(m < HAL_LU_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_LU(a, a_step, m, b, b_step, n, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_Cholesky32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, bool *info)
|
||||||
|
{
|
||||||
|
if(m < HAL_CHOLESKY_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_Cholesky(a, a_step, m, b, b_step, n, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_Cholesky64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, bool *info)
|
||||||
|
{
|
||||||
|
if(m < HAL_CHOLESKY_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_Cholesky(a, a_step, m, b, b_step, n, info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_SVD32f(float* a, size_t a_step, float *w, float* u, size_t u_step, float* vt, size_t v_step, int m, int n, int flags)
|
||||||
|
{
|
||||||
|
|
||||||
|
if(m < HAL_SVD_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
int info;
|
||||||
|
return lapack_SVD(a, a_step, w, u, u_step, vt, v_step, m, n, flags, &info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_SVD64f(double* a, size_t a_step, double *w, double* u, size_t u_step, double* vt, size_t v_step, int m, int n, int flags)
|
||||||
|
{
|
||||||
|
|
||||||
|
if(m < HAL_SVD_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
int info;
|
||||||
|
return lapack_SVD(a, a_step, w, u, u_step, vt, v_step, m, n, flags, &info);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_gemm32f(const float *src1, size_t src1_step, const float *src2, size_t src2_step, float alpha,
|
||||||
|
const float *src3, size_t src3_step, float beta, float *dst, size_t dst_step, int m, int n, int k, int flags)
|
||||||
|
{
|
||||||
|
if(m < HAL_GEMM_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_gemm(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_gemm64f(const double *src1, size_t src1_step, const double *src2, size_t src2_step, double alpha,
|
||||||
|
const double *src3, size_t src3_step, double beta, double *dst, size_t dst_step, int m, int n, int k, int flags)
|
||||||
|
{
|
||||||
|
if(m < HAL_GEMM_SMALL_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_gemm(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
int lapack_gemm32fc(const float *src1, size_t src1_step, const float *src2, size_t src2_step, float alpha,
|
||||||
|
const float *src3, size_t src3_step, float beta, float *dst, size_t dst_step, int m, int n, int k, int flags)
|
||||||
|
{
|
||||||
|
if(m < HAL_GEMM_SMALL_COMPLEX_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_gemm_c(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
|
||||||
|
}
|
||||||
|
int lapack_gemm64fc(const double *src1, size_t src1_step, const double *src2, size_t src2_step, double alpha,
|
||||||
|
const double *src3, size_t src3_step, double beta, double *dst, size_t dst_step, int m, int n, int k, int flags)
|
||||||
|
{
|
||||||
|
if(m < HAL_GEMM_SMALL_COMPLEX_MATRIX_THRESH)
|
||||||
|
return CV_HAL_ERROR_NOT_IMPLEMENTED;
|
||||||
|
return lapack_gemm_c(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m, n, k, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif //HAVE_LAPACK
|
96
modules/core/src/hal_internal.hpp
Normal file
96
modules/core/src/hal_internal.hpp
Normal file
@@ -0,0 +1,96 @@
|
|||||||
|
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||||
|
//
|
||||||
|
// By downloading, copying, installing or using the software you agree to this license.
|
||||||
|
// If you do not agree to this license, do not download, install,
|
||||||
|
// copy or use the software.
|
||||||
|
//
|
||||||
|
//
|
||||||
|
// License Agreement
|
||||||
|
// For Open Source Computer Vision Library
|
||||||
|
//
|
||||||
|
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||||
|
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||||
|
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
|
||||||
|
// Copyright (C) 2015, Itseez Inc., all rights reserved.
|
||||||
|
// Third party copyrights are property of their respective owners.
|
||||||
|
//
|
||||||
|
// Redistribution and use in source and binary forms, with or without modification,
|
||||||
|
// are permitted provided that the following conditions are met:
|
||||||
|
//
|
||||||
|
// * Redistribution's of source code must retain the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer.
|
||||||
|
//
|
||||||
|
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||||
|
// this list of conditions and the following disclaimer in the documentation
|
||||||
|
// and/or other materials provided with the distribution.
|
||||||
|
//
|
||||||
|
// * The name of the copyright holders may not be used to endorse or promote products
|
||||||
|
// derived from this software without specific prior written permission.
|
||||||
|
//
|
||||||
|
// This software is provided by the copyright holders and contributors "as is" and
|
||||||
|
// any express or implied warranties, including, but not limited to, the implied
|
||||||
|
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||||
|
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||||
|
// indirect, incidental, special, exemplary, or consequential damages
|
||||||
|
// (including, but not limited to, procurement of substitute goods or services;
|
||||||
|
// loss of use, data, or profits; or business interruption) however caused
|
||||||
|
// and on any theory of liability, whether in contract, strict liability,
|
||||||
|
// or tort (including negligence or otherwise) arising in any way out of
|
||||||
|
// the use of this software, even if advised of the possibility of such damage.
|
||||||
|
//
|
||||||
|
//M*/
|
||||||
|
|
||||||
|
#ifndef OPENCV_CORE_HAL_INTERNAL_HPP
|
||||||
|
#define OPENCV_CORE_HAL_INTERNAL_HPP
|
||||||
|
|
||||||
|
#include "precomp.hpp"
|
||||||
|
|
||||||
|
#ifdef HAVE_LAPACK
|
||||||
|
|
||||||
|
int lapack_LU32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, int* info);
|
||||||
|
int lapack_LU64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, int* info);
|
||||||
|
int lapack_Cholesky32f(float* a, size_t a_step, int m, float* b, size_t b_step, int n, bool* info);
|
||||||
|
int lapack_Cholesky64f(double* a, size_t a_step, int m, double* b, size_t b_step, int n, bool* info);
|
||||||
|
int lapack_SVD32f(float* a, size_t a_step, float* w, float* u, size_t u_step, float* vt, size_t v_step, int m, int n, int flags);
|
||||||
|
int lapack_SVD64f(double* a, size_t a_step, double* w, double* u, size_t u_step, double* vt, size_t v_step, int m, int n, int flags);
|
||||||
|
int lapack_gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags);
|
||||||
|
int lapack_gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags);
|
||||||
|
int lapack_gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags);
|
||||||
|
int lapack_gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags);
|
||||||
|
|
||||||
|
#undef cv_hal_LU32f
|
||||||
|
#define cv_hal_LU32f lapack_LU32f
|
||||||
|
#undef cv_hal_LU64f
|
||||||
|
#define cv_hal_LU64f lapack_LU64f
|
||||||
|
|
||||||
|
#undef cv_hal_Cholesky32f
|
||||||
|
#define cv_hal_Cholesky32f lapack_Cholesky32f
|
||||||
|
#undef cv_hal_Cholesky64f
|
||||||
|
#define cv_hal_Cholesky64f lapack_Cholesky64f
|
||||||
|
|
||||||
|
#undef cv_hal_SVD32f
|
||||||
|
#define cv_hal_SVD32f lapack_SVD32f
|
||||||
|
#undef cv_hal_SVD64f
|
||||||
|
#define cv_hal_SVD64f lapack_SVD64f
|
||||||
|
|
||||||
|
#undef cv_hal_gemm32f
|
||||||
|
#define cv_hal_gemm32f lapack_gemm32f
|
||||||
|
#undef cv_hal_gemm64f
|
||||||
|
#define cv_hal_gemm64f lapack_gemm64f
|
||||||
|
#undef cv_hal_gemm32fc
|
||||||
|
#define cv_hal_gemm32fc lapack_gemm32fc
|
||||||
|
#undef cv_hal_gemm64fc
|
||||||
|
#define cv_hal_gemm64fc lapack_gemm64fc
|
||||||
|
|
||||||
|
#endif //HAVE_LAPACK
|
||||||
|
#endif //OPENCV_CORE_HAL_INTERNAL_HPP
|
@@ -472,14 +472,148 @@ inline int hal_ni_dctFree2D(cvhalDFT *context) { return CV_HAL_ERROR_NOT_IMPLEME
|
|||||||
#define cv_hal_dctFree2D hal_ni_dctFree2D
|
#define cv_hal_dctFree2D hal_ni_dctFree2D
|
||||||
//! @endcond
|
//! @endcond
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
Performs \f$LU\f$ decomposition of square matrix \f$A=P*L*U\f$ (where \f$P\f$ is permutation matrix) and solves matrix equation \f$A*X=B\f$.
|
||||||
|
Function returns the \f$sign\f$ of permutation \f$P\f$ via parameter info.
|
||||||
|
@param src1 pointer to input matrix \f$A\f$ stored in row major order. After finish of work src1 contains at least \f$U\f$ part of \f$LU\f$
|
||||||
|
decomposition which is appropriate for determainant calculation: \f$det(A)=sign*\prod_{j=1}^{M}a_{jj}\f$.
|
||||||
|
@param src1_step number of bytes each matrix \f$A\f$ row occupies.
|
||||||
|
@param m size of square matrix \f$A\f$.
|
||||||
|
@param src2 pointer to \f$M\times N\f$ matrix \f$B\f$ which is the right-hand side of system \f$A*X=B\f$. \f$B\f$ stored in row major order.
|
||||||
|
If src2 is null pointer only \f$LU\f$ decomposition will be performed. After finish of work src2 contains solution \f$X\f$ of system \f$A*X=B\f$.
|
||||||
|
@param src2_step number of bytes each matrix \f$B\f$ row occupies.
|
||||||
|
@param n number of right-hand vectors in \f$M\times N\f$ matrix \f$B\f$.
|
||||||
|
@param info indicates success of decomposition. If *info is equals to zero decomposition failed, othervise *info is equals to \f$sign\f$.
|
||||||
|
*/
|
||||||
|
//! @addtogroup core_hal_interface_decomp_lu LU matrix decomposition
|
||||||
|
//! @{
|
||||||
|
inline int hal_ni_LU32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, int* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_LU64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, int* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
//! @}
|
//! @}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Performs Cholesky decomposition of matrix \f$A = L*L^T\f$ and solves matrix equation \f$A*X=B\f$.
|
||||||
|
@param src1 pointer to input matrix \f$A\f$ stored in row major order. After finish of work src1 contains lower triangular matrix \f$L\f$.
|
||||||
|
@param src1_step number of bytes each matrix \f$A\f$ row occupies.
|
||||||
|
@param m size of square matrix \f$A\f$.
|
||||||
|
@param src2 pointer to \f$M\times N\f$ matrix \f$B\f$ which is the right-hand side of system \f$A*X=B\f$. B stored in row major order.
|
||||||
|
If src2 is null pointer only Cholesky decomposition will be performed. After finish of work src2 contains solution \f$X\f$ of system \f$A*X=B\f$.
|
||||||
|
@param src2_step number of bytes each matrix \f$B\f$ row occupies.
|
||||||
|
@param n number of right-hand vectors in \f$M\times N\f$ matrix \f$B\f$.
|
||||||
|
@param info indicates success of decomposition. If *info is false decomposition failed.
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @addtogroup core_hal_interface_decomp_cholesky Cholesky matrix decomposition
|
||||||
|
//! @{
|
||||||
|
inline int hal_ni_Cholesky32f(float* src1, size_t src1_step, int m, float* src2, size_t src2_step, int n, bool* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_Cholesky64f(double* src1, size_t src1_step, int m, double* src2, size_t src2_step, int n, bool* info) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
/**
|
||||||
|
Performs singular value decomposition of \f$M\times N\f$(\f$M>N\f$) matrix \f$A = U*\Sigma*V^T\f$.
|
||||||
|
@param src pointer to input \f$M\times N\f$ matrix \f$A\f$ stored in column major order.
|
||||||
|
After finish of work src will be filled with rows of \f$U\f$ or not modified (depends of flag CV_HAL_SVD_MODIFY_A).
|
||||||
|
@param src_step number of bytes each matrix \f$A\f$ column occupies.
|
||||||
|
@param w pointer to array for singular values of matrix \f$A\f$ (i. e. first \f$N\f$ diagonal elements of matrix \f$\Sigma\f$).
|
||||||
|
@param u pointer to output \f$M\times N\f$ or \f$M\times M\f$ matrix \f$U\f$ (size depends of flags). Pointer must be valid if flag CV_HAL_SVD_MODIFY_A not used.
|
||||||
|
@param u_step number of bytes each matrix \f$U\f$ row occupies.
|
||||||
|
@param vt pointer to array for \f$N\times N\f$ matrix \f$V^T\f$.
|
||||||
|
@param vt_step number of bytes each matrix \f$V^T\f$ row occupies.
|
||||||
|
@param m number fo rows in matrix \f$A\f$.
|
||||||
|
@param n number of columns in matrix \f$A\f$.
|
||||||
|
@param flags algorithm options (combination of CV_HAL_SVD_FULL_UV, ...).
|
||||||
|
*/
|
||||||
|
//! @addtogroup core_hal_interface_decomp_svd Singular value matrix decomposition
|
||||||
|
//! @{
|
||||||
|
inline int hal_ni_SVD32f(float* src, size_t src_step, float* w, float* u, size_t u_step, float* vt, size_t vt_step, int m, int n, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_SVD64f(double* src, size_t src_step, double* w, double* u, size_t u_step, double* vt, size_t vt_step, int m, int n, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#define cv_hal_LU32f hal_ni_LU32f
|
||||||
|
#define cv_hal_LU64f hal_ni_LU64f
|
||||||
|
#define cv_hal_Cholesky32f hal_ni_Cholesky32f
|
||||||
|
#define cv_hal_Cholesky64f hal_ni_Cholesky64f
|
||||||
|
#define cv_hal_SVD32f hal_ni_SVD32f
|
||||||
|
#define cv_hal_SVD64f hal_ni_SVD64f
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
The function performs generalized matrix multiplication similar to the gemm functions in BLAS level 3:
|
||||||
|
\f$D = \alpha*AB+\beta*C\f$
|
||||||
|
|
||||||
|
@param src1 pointer to input \f$M\times N\f$ matrix \f$A\f$ or \f$A^T\f$ stored in row major order.
|
||||||
|
@param src1_step number of bytes each matrix \f$A\f$ or \f$A^T\f$ row occupies.
|
||||||
|
@param src2 pointer to input \f$N\times K\f$ matrix \f$B\f$ or \f$B^T\f$ stored in row major order.
|
||||||
|
@param src2_step number of bytes each matrix \f$B\f$ or \f$B^T\f$ row occupies.
|
||||||
|
@param alpha \f$\alpha\f$ multiplier before \f$AB\f$
|
||||||
|
@param src3 pointer to input \f$M\times K\f$ matrix \f$C\f$ or \f$C^T\f$ stored in row major order.
|
||||||
|
@param src3_step number of bytes each matrix \f$C\f$ or \f$C^T\f$ row occupies.
|
||||||
|
@param beta \f$\beta\f$ multiplier before \f$C\f$
|
||||||
|
@param dst pointer to input \f$M\times K\f$ matrix \f$D\f$ stored in row major order.
|
||||||
|
@param dst_step number of bytes each matrix \f$D\f$ row occupies.
|
||||||
|
@param m number of rows in matrix \f$A\f$ or \f$A^T\f$, equals to number of rows in matrix \f$D\f$
|
||||||
|
@param n number of columns in matrix \f$A\f$ or \f$A^T\f$
|
||||||
|
@param k number of columns in matrix \f$B\f$ or \f$B^T\f$, equals to number of columns in matrix \f$D\f$
|
||||||
|
@param flags algorithm options (combination of CV_HAL_GEMM_1_T, ...).
|
||||||
|
*/
|
||||||
|
|
||||||
|
//! @addtogroup core_hal_interface_matrix_multiplication Matrix multiplication
|
||||||
|
//! @{
|
||||||
|
inline int hal_ni_gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
inline int hal_ni_gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m, int n, int k, int flags) { return CV_HAL_ERROR_NOT_IMPLEMENTED; }
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#define cv_hal_gemm32f hal_ni_gemm32f
|
||||||
|
#define cv_hal_gemm64f hal_ni_gemm64f
|
||||||
|
#define cv_hal_gemm32fc hal_ni_gemm32fc
|
||||||
|
#define cv_hal_gemm64fc hal_ni_gemm64fc
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
|
//! @}
|
||||||
|
|
||||||
|
|
||||||
#if defined __GNUC__
|
#if defined __GNUC__
|
||||||
# pragma GCC diagnostic pop
|
# pragma GCC diagnostic pop
|
||||||
#elif defined _MSC_VER
|
#elif defined _MSC_VER
|
||||||
# pragma warning( pop )
|
# pragma warning( pop )
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#include "hal_internal.hpp"
|
||||||
#include "custom_hal.hpp"
|
#include "custom_hal.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#define CALL_HAL_RET(name, fun, retval, ...) \
|
||||||
|
int res = fun(__VA_ARGS__, &retval); \
|
||||||
|
if (res == CV_HAL_ERROR_OK) \
|
||||||
|
return retval; \
|
||||||
|
else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
|
||||||
|
CV_Error_(cv::Error::StsInternal, \
|
||||||
|
("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
|
||||||
|
|
||||||
|
|
||||||
|
#define CALL_HAL(name, fun, ...) \
|
||||||
|
int res = fun(__VA_ARGS__); \
|
||||||
|
if (res == CV_HAL_ERROR_OK) \
|
||||||
|
return; \
|
||||||
|
else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
|
||||||
|
CV_Error_(cv::Error::StsInternal, \
|
||||||
|
("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
@@ -570,11 +570,44 @@ JacobiSVDImpl_(_Tp* At, size_t astep, _Tp* _W, _Tp* Vt, size_t vstep,
|
|||||||
|
|
||||||
static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
|
static void JacobiSVD(float* At, size_t astep, float* W, float* Vt, size_t vstep, int m, int n, int n1=-1)
|
||||||
{
|
{
|
||||||
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
|
hal::SVD32f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
|
static void JacobiSVD(double* At, size_t astep, double* W, double* Vt, size_t vstep, int m, int n, int n1=-1)
|
||||||
{
|
{
|
||||||
|
hal::SVD64f(At, astep, W, NULL, astep, Vt, vstep, m, n, n1);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename fptype> static inline int
|
||||||
|
decodeSVDParameters(const fptype* U, const fptype* Vt, int m, int n, int n1)
|
||||||
|
{
|
||||||
|
int halSVDFlag = 0;
|
||||||
|
if(Vt == NULL)
|
||||||
|
halSVDFlag = CV_HAL_SVD_NO_UV;
|
||||||
|
else if(n1 <= 0 || n1 == n)
|
||||||
|
{
|
||||||
|
halSVDFlag = CV_HAL_SVD_SHORT_UV;
|
||||||
|
if(U == NULL)
|
||||||
|
halSVDFlag |= CV_HAL_SVD_MODIFY_A;
|
||||||
|
}
|
||||||
|
else if(n1 == m)
|
||||||
|
{
|
||||||
|
halSVDFlag = CV_HAL_SVD_FULL_UV;
|
||||||
|
if(U == NULL)
|
||||||
|
halSVDFlag |= CV_HAL_SVD_MODIFY_A;
|
||||||
|
}
|
||||||
|
return halSVDFlag;
|
||||||
|
}
|
||||||
|
|
||||||
|
void hal::SVD32f(float* At, size_t astep, float* W, float* U, size_t ustep, float* Vt, size_t vstep, int m, int n, int n1)
|
||||||
|
{
|
||||||
|
CALL_HAL(SVD32f, cv_hal_SVD32f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
|
||||||
|
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, FLT_MIN, FLT_EPSILON*2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void hal::SVD64f(double* At, size_t astep, double* W, double* U, size_t ustep, double* Vt, size_t vstep, int m, int n, int n1)
|
||||||
|
{
|
||||||
|
CALL_HAL(SVD64f, cv_hal_SVD64f, At, astep, W, U, ustep, Vt, vstep, m, n, decodeSVDParameters(U, Vt, m, n, n1))
|
||||||
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
|
JacobiSVDImpl_(At, astep, W, Vt, vstep, m, n, !Vt ? 0 : n1 < 0 ? n : n1, DBL_MIN, DBL_EPSILON*10);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -745,7 +778,6 @@ double cv::determinant( InputArray _mat )
|
|||||||
{
|
{
|
||||||
for( int i = 0; i < rows; i++ )
|
for( int i = 0; i < rows; i++ )
|
||||||
result *= a.at<float>(i,i);
|
result *= a.at<float>(i,i);
|
||||||
result = 1./result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -769,7 +801,6 @@ double cv::determinant( InputArray _mat )
|
|||||||
{
|
{
|
||||||
for( int i = 0; i < rows; i++ )
|
for( int i = 0; i < rows; i++ )
|
||||||
result *= a.at<double>(i,i);
|
result *= a.at<double>(i,i);
|
||||||
result = 1./result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@@ -864,73 +864,39 @@ static bool ocl_gemm( InputArray matA, InputArray matB, double alpha,
|
|||||||
return k.run(2, globalsize, block_size!=1 ? localsize : NULL, false);
|
return k.run(2, globalsize, block_size!=1 ? localsize : NULL, false);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
static void gemmImpl( Mat A, Mat B, double alpha,
|
||||||
InputArray matC, double beta, OutputArray _matD, int flags )
|
Mat C, double beta, Mat D, int flags )
|
||||||
{
|
{
|
||||||
#ifdef HAVE_CLAMDBLAS
|
|
||||||
CV_OCL_RUN(ocl::haveAmdBlas() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2 && _matD.isUMat() &&
|
|
||||||
matA.cols() > 20 && matA.rows() > 20 && matB.cols() > 20, // since it works incorrect for small sizes
|
|
||||||
ocl_gemm_amdblas(matA, matB, alpha, matC, beta, _matD, flags))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef HAVE_OPENCL
|
|
||||||
CV_OCL_RUN(_matD.isUMat() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2,
|
|
||||||
ocl_gemm(matA, matB, alpha, matC, beta, _matD, flags))
|
|
||||||
#endif
|
|
||||||
|
|
||||||
const int block_lin_size = 128;
|
const int block_lin_size = 128;
|
||||||
const int block_size = block_lin_size * block_lin_size;
|
const int block_size = block_lin_size * block_lin_size;
|
||||||
|
|
||||||
static double zero[] = {0,0,0,0};
|
static double zero[] = {0,0,0,0};
|
||||||
static float zerof[] = {0,0,0,0};
|
static float zerof[] = {0,0,0,0};
|
||||||
|
|
||||||
Mat A = matA.getMat(), B = matB.getMat(), C = beta != 0 ? matC.getMat() : Mat();
|
|
||||||
Size a_size = A.size(), d_size;
|
Size a_size = A.size(), d_size;
|
||||||
int i, len = 0, type = A.type();
|
int i, len = 0, type = A.type();
|
||||||
|
|
||||||
CV_Assert( type == B.type() && (type == CV_32FC1 || type == CV_64FC1 || type == CV_32FC2 || type == CV_64FC2) );
|
|
||||||
|
|
||||||
switch( flags & (GEMM_1_T|GEMM_2_T) )
|
switch( flags & (GEMM_1_T|GEMM_2_T) )
|
||||||
{
|
{
|
||||||
case 0:
|
case 0:
|
||||||
d_size = Size( B.cols, a_size.height );
|
d_size = Size( B.cols, a_size.height );
|
||||||
len = B.rows;
|
len = B.rows;
|
||||||
CV_Assert( a_size.width == len );
|
|
||||||
break;
|
break;
|
||||||
case 1:
|
case 1:
|
||||||
d_size = Size( B.cols, a_size.width );
|
d_size = Size( B.cols, a_size.width );
|
||||||
len = B.rows;
|
len = B.rows;
|
||||||
CV_Assert( a_size.height == len );
|
|
||||||
break;
|
break;
|
||||||
case 2:
|
case 2:
|
||||||
d_size = Size( B.rows, a_size.height );
|
d_size = Size( B.rows, a_size.height );
|
||||||
len = B.cols;
|
len = B.cols;
|
||||||
CV_Assert( a_size.width == len );
|
|
||||||
break;
|
break;
|
||||||
case 3:
|
case 3:
|
||||||
d_size = Size( B.rows, a_size.width );
|
d_size = Size( B.rows, a_size.width );
|
||||||
len = B.cols;
|
len = B.cols;
|
||||||
CV_Assert( a_size.height == len );
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( !C.empty() )
|
|
||||||
{
|
|
||||||
CV_Assert( C.type() == type &&
|
|
||||||
(((flags&GEMM_3_T) == 0 && C.rows == d_size.height && C.cols == d_size.width) ||
|
|
||||||
((flags&GEMM_3_T) != 0 && C.rows == d_size.width && C.cols == d_size.height)));
|
|
||||||
}
|
|
||||||
|
|
||||||
_matD.create( d_size.height, d_size.width, type );
|
|
||||||
Mat D = _matD.getMat();
|
|
||||||
if( (flags & GEMM_3_T) != 0 && C.data == D.data )
|
|
||||||
{
|
|
||||||
transpose( C, C );
|
|
||||||
flags &= ~GEMM_3_T;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( flags == 0 && 2 <= len && len <= 4 && (len == d_size.width || len == d_size.height) )
|
if( flags == 0 && 2 <= len && len <= 4 && (len == d_size.width || len == d_size.height) )
|
||||||
{
|
{
|
||||||
if( type == CV_32F )
|
if( type == CV_32F )
|
||||||
@@ -1194,8 +1160,7 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
|||||||
GEMMSingleMulFunc singleMulFunc;
|
GEMMSingleMulFunc singleMulFunc;
|
||||||
GEMMBlockMulFunc blockMulFunc;
|
GEMMBlockMulFunc blockMulFunc;
|
||||||
GEMMStoreFunc storeFunc;
|
GEMMStoreFunc storeFunc;
|
||||||
Mat *matD = &D, tmat;
|
Mat *matD = &D;
|
||||||
size_t tmat_size = 0;
|
|
||||||
const uchar* Cdata = C.data;
|
const uchar* Cdata = C.data;
|
||||||
size_t Cstep = C.data ? (size_t)C.step : 0;
|
size_t Cstep = C.data ? (size_t)C.step : 0;
|
||||||
AutoBuffer<uchar> buf;
|
AutoBuffer<uchar> buf;
|
||||||
@@ -1226,13 +1191,6 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
|||||||
storeFunc = (GEMMStoreFunc)GEMMStore_64fc;
|
storeFunc = (GEMMStoreFunc)GEMMStore_64fc;
|
||||||
}
|
}
|
||||||
|
|
||||||
if( D.data == A.data || D.data == B.data )
|
|
||||||
{
|
|
||||||
tmat_size = (size_t)d_size.width*d_size.height*CV_ELEM_SIZE(type);
|
|
||||||
// Allocate tmat later, once the size of buf is known
|
|
||||||
matD = &tmat;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( (d_size.width == 1 || len == 1) && !(flags & GEMM_2_T) && B.isContinuous() )
|
if( (d_size.width == 1 || len == 1) && !(flags & GEMM_2_T) && B.isContinuous() )
|
||||||
{
|
{
|
||||||
b_step = d_size.width == 1 ? 0 : CV_ELEM_SIZE(type);
|
b_step = d_size.width == 1 ? 0 : CV_ELEM_SIZE(type);
|
||||||
@@ -1306,10 +1264,6 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
|||||||
(d_size.width <= block_lin_size &&
|
(d_size.width <= block_lin_size &&
|
||||||
d_size.height <= block_lin_size && len <= block_lin_size) )
|
d_size.height <= block_lin_size && len <= block_lin_size) )
|
||||||
{
|
{
|
||||||
if( tmat_size > 0 ) {
|
|
||||||
buf.allocate(tmat_size);
|
|
||||||
tmat = Mat(d_size.height, d_size.width, type, (uchar*)buf );
|
|
||||||
}
|
|
||||||
singleMulFunc( A.ptr(), A.step, B.ptr(), b_step, Cdata, Cstep,
|
singleMulFunc( A.ptr(), A.step, B.ptr(), b_step, Cdata, Cstep,
|
||||||
matD->ptr(), matD->step, a_size, d_size, alpha, beta, flags );
|
matD->ptr(), matD->step, a_size, d_size, alpha, beta, flags );
|
||||||
}
|
}
|
||||||
@@ -1369,14 +1323,12 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
|||||||
flags &= ~GEMM_1_T;
|
flags &= ~GEMM_1_T;
|
||||||
}
|
}
|
||||||
|
|
||||||
buf.allocate(d_buf_size + b_buf_size + a_buf_size + tmat_size);
|
buf.allocate(d_buf_size + b_buf_size + a_buf_size);
|
||||||
d_buf = (uchar*)buf;
|
d_buf = (uchar*)buf;
|
||||||
b_buf = d_buf + d_buf_size;
|
b_buf = d_buf + d_buf_size;
|
||||||
|
|
||||||
if( is_a_t )
|
if( is_a_t )
|
||||||
a_buf = b_buf + b_buf_size;
|
a_buf = b_buf + b_buf_size;
|
||||||
if( tmat_size > 0 )
|
|
||||||
tmat = Mat(d_size.height, d_size.width, type, b_buf + b_buf_size + a_buf_size );
|
|
||||||
|
|
||||||
for( i = 0; i < d_size.height; i += di )
|
for( i = 0; i < d_size.height; i += di )
|
||||||
{
|
{
|
||||||
@@ -1455,12 +1407,200 @@ void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if( matD != &D )
|
|
||||||
matD->copyTo(D);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename fptype>inline static void
|
||||||
|
callGemmImpl(const fptype *src1, size_t src1_step, const fptype *src2, size_t src2_step, fptype alpha,
|
||||||
|
const fptype *src3, size_t src3_step, fptype beta, fptype *dst, size_t dst_step, int m_a, int n_a, int n_d, int flags, int type)
|
||||||
|
{
|
||||||
|
CV_StaticAssert(GEMM_1_T == CV_HAL_GEMM_1_T, "Incompatible GEMM_1_T flag in HAL");
|
||||||
|
CV_StaticAssert(GEMM_2_T == CV_HAL_GEMM_2_T, "Incompatible GEMM_2_T flag in HAL");
|
||||||
|
CV_StaticAssert(GEMM_3_T == CV_HAL_GEMM_3_T, "Incompatible GEMM_3_T flag in HAL");
|
||||||
|
|
||||||
|
int b_m, b_n, c_m, c_n, m_d;
|
||||||
|
|
||||||
|
if(flags & GEMM_2_T)
|
||||||
|
{
|
||||||
|
b_m = n_d;
|
||||||
|
if(flags & GEMM_1_T )
|
||||||
|
{
|
||||||
|
b_n = m_a;
|
||||||
|
m_d = n_a;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
b_n = n_a;
|
||||||
|
m_d = m_a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
b_n = n_d;
|
||||||
|
if(flags & GEMM_1_T )
|
||||||
|
{
|
||||||
|
b_m = m_a;
|
||||||
|
m_d = n_a;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
m_d = m_a;
|
||||||
|
b_m = n_a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(flags & GEMM_3_T)
|
||||||
|
{
|
||||||
|
c_m = n_d;
|
||||||
|
c_n = m_d;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
c_m = m_d;
|
||||||
|
c_n = n_d;
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat A, B, C;
|
||||||
|
if(src1 != NULL)
|
||||||
|
A = Mat(m_a, n_a, type, (void*)src1, src1_step);
|
||||||
|
if(src2 != NULL)
|
||||||
|
B = Mat(b_m, b_n, type, (void*)src2, src2_step);
|
||||||
|
if(src3 != NULL && beta != 0.0)
|
||||||
|
C = Mat(c_m, c_n, type, (void*)src3, src3_step);
|
||||||
|
Mat D(m_d, n_d, type, (void*)dst, dst_step);
|
||||||
|
|
||||||
|
gemmImpl(A, B, alpha, C, beta, D, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::hal::gemm32f(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags)
|
||||||
|
{
|
||||||
|
|
||||||
|
CALL_HAL(gemm32f, cv_hal_gemm32f, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
|
||||||
|
callGemmImpl(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags, CV_32F);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::hal::gemm64f(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags)
|
||||||
|
{
|
||||||
|
CALL_HAL(gemm64f, cv_hal_gemm64f, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
|
||||||
|
callGemmImpl(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags, CV_64F);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_EXPORTS void cv::hal::gemm32fc(const float* src1, size_t src1_step, const float* src2, size_t src2_step,
|
||||||
|
float alpha, const float* src3, size_t src3_step, float beta, float* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags)
|
||||||
|
{
|
||||||
|
CALL_HAL(gemm32fc, cv_hal_gemm32fc, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
|
||||||
|
callGemmImpl(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags, CV_32FC2);
|
||||||
|
}
|
||||||
|
|
||||||
|
CV_EXPORTS void cv::hal::gemm64fc(const double* src1, size_t src1_step, const double* src2, size_t src2_step,
|
||||||
|
double alpha, const double* src3, size_t src3_step, double beta, double* dst, size_t dst_step,
|
||||||
|
int m_a, int n_a, int n_d, int flags)
|
||||||
|
{
|
||||||
|
CALL_HAL(gemm64fc, cv_hal_gemm64fc, src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags)
|
||||||
|
callGemmImpl(src1, src1_step, src2, src2_step, alpha, src3, src3_step, beta, dst, dst_step, m_a, n_a, n_d, flags, CV_64FC2);
|
||||||
|
}
|
||||||
|
|
||||||
|
void cv::gemm( InputArray matA, InputArray matB, double alpha,
|
||||||
|
InputArray matC, double beta, OutputArray _matD, int flags )
|
||||||
|
{
|
||||||
|
#ifdef HAVE_CLAMDBLAS
|
||||||
|
CV_OCL_RUN(ocl::haveAmdBlas() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2 && _matD.isUMat() &&
|
||||||
|
matA.cols() > 20 && matA.rows() > 20 && matB.cols() > 20, // since it works incorrect for small sizes
|
||||||
|
ocl_gemm_amdblas(matA, matB, alpha, matC, beta, _matD, flags))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef HAVE_OPENCL
|
||||||
|
CV_OCL_RUN(_matD.isUMat() && matA.dims() <= 2 && matB.dims() <= 2 && matC.dims() <= 2,
|
||||||
|
ocl_gemm(matA, matB, alpha, matC, beta, _matD, flags))
|
||||||
|
#endif
|
||||||
|
|
||||||
|
Mat A = matA.getMat(), B = matB.getMat(), C = beta != 0.0 ? matC.getMat() : Mat();
|
||||||
|
Size a_size = A.size(), d_size;
|
||||||
|
int len = 0, type = A.type();
|
||||||
|
|
||||||
|
CV_Assert( type == B.type() && (type == CV_32FC1 || type == CV_64FC1 || type == CV_32FC2 || type == CV_64FC2) );
|
||||||
|
|
||||||
|
switch( flags & (GEMM_1_T|GEMM_2_T) )
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
d_size = Size( B.cols, a_size.height );
|
||||||
|
len = B.rows;
|
||||||
|
CV_Assert( a_size.width == len );
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
d_size = Size( B.cols, a_size.width );
|
||||||
|
len = B.rows;
|
||||||
|
CV_Assert( a_size.height == len );
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
d_size = Size( B.rows, a_size.height );
|
||||||
|
len = B.cols;
|
||||||
|
CV_Assert( a_size.width == len );
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
d_size = Size( B.rows, a_size.width );
|
||||||
|
len = B.cols;
|
||||||
|
CV_Assert( a_size.height == len );
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( !C.empty() )
|
||||||
|
{
|
||||||
|
CV_Assert( C.type() == type &&
|
||||||
|
(((flags&GEMM_3_T) == 0 && C.rows == d_size.height && C.cols == d_size.width) ||
|
||||||
|
((flags&GEMM_3_T) != 0 && C.rows == d_size.width && C.cols == d_size.height)));
|
||||||
|
}
|
||||||
|
|
||||||
|
_matD.create( d_size.height, d_size.width, type );
|
||||||
|
Mat D = _matD.getMat();
|
||||||
|
if( (flags & GEMM_3_T) != 0 && C.data == D.data )
|
||||||
|
{
|
||||||
|
transpose( C, C );
|
||||||
|
flags &= ~GEMM_3_T;
|
||||||
|
}
|
||||||
|
|
||||||
|
Mat *DProxyPtr = &D, DProxy;
|
||||||
|
if( D.data == A.data || D.data == B.data )
|
||||||
|
{
|
||||||
|
DProxy = Mat(d_size.height, d_size.width, D.type());
|
||||||
|
DProxyPtr = &DProxy;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( type == CV_32FC1 )
|
||||||
|
hal::gemm32f(A.ptr<float>(), A.step, B.ptr<float>(), B.step, static_cast<float>(alpha),
|
||||||
|
C.ptr<float>(), C.step, static_cast<float>(beta),
|
||||||
|
DProxyPtr->ptr<float>(), DProxyPtr->step,
|
||||||
|
a_size.height, a_size.width, DProxyPtr->cols, flags);
|
||||||
|
else if( type == CV_64FC1 )
|
||||||
|
hal::gemm64f(A.ptr<double>(), A.step, B.ptr<double>(), B.step, alpha,
|
||||||
|
C.ptr<double>(), C.step, beta,
|
||||||
|
DProxyPtr->ptr<double>(), DProxyPtr->step,
|
||||||
|
a_size.height, a_size.width, DProxyPtr->cols, flags);
|
||||||
|
else if( type == CV_32FC2 )
|
||||||
|
hal::gemm32fc(A.ptr<float>(), A.step, B.ptr<float>(), B.step, static_cast<float>(alpha),
|
||||||
|
C.ptr<float>(), C.step, static_cast<float>(beta),
|
||||||
|
DProxyPtr->ptr<float>(), DProxyPtr->step,
|
||||||
|
a_size.height, a_size.width, DProxyPtr->cols, flags);
|
||||||
|
else
|
||||||
|
{
|
||||||
|
CV_Assert( type == CV_64FC2 );
|
||||||
|
hal::gemm64fc(A.ptr<double>(), A.step, B.ptr<double>(), B.step, alpha,
|
||||||
|
C.ptr<double>(), C.step, beta,
|
||||||
|
D.ptr<double>(), D.step,
|
||||||
|
a_size.height, a_size.width, DProxyPtr->cols, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
if(DProxyPtr != &D)
|
||||||
|
DProxyPtr->copyTo(D);
|
||||||
|
}
|
||||||
|
|
||||||
/****************************************************************************************\
|
/****************************************************************************************\
|
||||||
* Transform *
|
* Transform *
|
||||||
\****************************************************************************************/
|
\****************************************************************************************/
|
||||||
|
@@ -89,8 +89,6 @@ LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
|
|||||||
for( k = 0; k < n; k++ )
|
for( k = 0; k < n; k++ )
|
||||||
b[j*bstep + k] += alpha*b[i*bstep + k];
|
b[j*bstep + k] += alpha*b[i*bstep + k];
|
||||||
}
|
}
|
||||||
|
|
||||||
A[i*astep + i] = -d;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if( b )
|
if( b )
|
||||||
@@ -101,7 +99,7 @@ LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
|
|||||||
_Tp s = b[i*bstep + j];
|
_Tp s = b[i*bstep + j];
|
||||||
for( k = i+1; k < m; k++ )
|
for( k = i+1; k < m; k++ )
|
||||||
s -= A[i*astep + k]*b[k*bstep + j];
|
s -= A[i*astep + k]*b[k*bstep + j];
|
||||||
b[i*bstep + j] = s*A[i*astep + i];
|
b[i*bstep + j] = s/A[i*astep + i];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -111,13 +109,19 @@ LUImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n, _Tp eps)
|
|||||||
|
|
||||||
int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
|
int LU32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
|
||||||
{
|
{
|
||||||
return LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
|
int output;
|
||||||
|
CALL_HAL_RET(LU32f, cv_hal_LU32f, output, A, astep, m, b, bstep, n)
|
||||||
|
output = LUImpl(A, astep, m, b, bstep, n, FLT_EPSILON*10);
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
|
int LU64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
|
||||||
{
|
{
|
||||||
return LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
|
int output;
|
||||||
|
CALL_HAL_RET(LU64f, cv_hal_LU64f, output, A, astep, m, b, bstep, n)
|
||||||
|
output = LUImpl(A, astep, m, b, bstep, n, DBL_EPSILON*100);
|
||||||
|
return output;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename _Tp> static inline bool
|
template<typename _Tp> static inline bool
|
||||||
@@ -193,14 +197,17 @@ CholImpl(_Tp* A, size_t astep, int m, _Tp* b, size_t bstep, int n)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
|
bool Cholesky32f(float* A, size_t astep, int m, float* b, size_t bstep, int n)
|
||||||
{
|
{
|
||||||
|
bool output;
|
||||||
|
CALL_HAL_RET(Cholesky32f, cv_hal_Cholesky32f, output, A, astep, m, b, bstep, n)
|
||||||
return CholImpl(A, astep, m, b, bstep, n);
|
return CholImpl(A, astep, m, b, bstep, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
|
bool Cholesky64f(double* A, size_t astep, int m, double* b, size_t bstep, int n)
|
||||||
{
|
{
|
||||||
|
bool output;
|
||||||
|
CALL_HAL_RET(Cholesky64f, cv_hal_Cholesky64f, output, A, astep, m, b, bstep, n)
|
||||||
return CholImpl(A, astep, m, b, bstep, n);
|
return CholImpl(A, astep, m, b, bstep, n);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@@ -309,4 +309,23 @@ inline int hal_ni_warpPerspectve(int src_type, const uchar *src_data, size_t src
|
|||||||
|
|
||||||
#include "custom_hal.hpp"
|
#include "custom_hal.hpp"
|
||||||
|
|
||||||
|
//! @cond IGNORED
|
||||||
|
#define CALL_HAL_RET(name, fun, retval, ...) \
|
||||||
|
int res = fun(__VA_ARGS__, &retval); \
|
||||||
|
if (res == CV_HAL_ERROR_OK) \
|
||||||
|
return retval; \
|
||||||
|
else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
|
||||||
|
CV_Error_(cv::Error::StsInternal, \
|
||||||
|
("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
|
||||||
|
|
||||||
|
|
||||||
|
#define CALL_HAL(name, fun, ...) \
|
||||||
|
int res = fun(__VA_ARGS__); \
|
||||||
|
if (res == CV_HAL_ERROR_OK) \
|
||||||
|
return; \
|
||||||
|
else if (res != CV_HAL_ERROR_NOT_IMPLEMENTED) \
|
||||||
|
CV_Error_(cv::Error::StsInternal, \
|
||||||
|
("HAL implementation " CVAUX_STR(name) " ==> " CVAUX_STR(fun) " returned %d (0x%08x)", res, res));
|
||||||
|
//! @endcond
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
Reference in New Issue
Block a user