Revert "Merge pull request #836 from jet47:gpu-modules"
This reverts commit fba72cb60d27905cce9f1390250adf13d5355c03, reversing changes made to 02131ffb620f349617a65da334591eca5d3cb23b.
This commit is contained in:
parent
fba72cb60d
commit
416fb50594
@ -28,9 +28,6 @@ if(CUDA_FOUND)
|
||||
|
||||
if(WITH_NVCUVID)
|
||||
find_cuda_helper_libs(nvcuvid)
|
||||
if(WIN32)
|
||||
find_cuda_helper_libs(nvcuvenc)
|
||||
endif()
|
||||
set(HAVE_NVCUVID 1)
|
||||
endif()
|
||||
|
||||
|
@ -73,8 +73,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
|
||||
}
|
||||
|
||||
int width;
|
||||
D val;
|
||||
const int width;
|
||||
const D val;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColConstant
|
||||
@ -98,8 +98,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
|
||||
}
|
||||
|
||||
int height;
|
||||
D val;
|
||||
const int height;
|
||||
const D val;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdConstant
|
||||
@ -120,9 +120,9 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||
}
|
||||
|
||||
int height;
|
||||
int width;
|
||||
D val;
|
||||
const int height;
|
||||
const int width;
|
||||
const D val;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
@ -165,7 +165,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
const int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReplicate
|
||||
@ -205,7 +205,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
const int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReplicate
|
||||
@ -255,8 +255,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
const int last_row;
|
||||
const int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
const int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReflect101
|
||||
@ -339,7 +339,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
const int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReflect101
|
||||
@ -389,8 +389,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
const int last_row;
|
||||
const int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
@ -433,7 +433,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int last_col;
|
||||
const int last_col;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColReflect
|
||||
@ -473,7 +473,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
const int last_row;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdReflect
|
||||
@ -523,8 +523,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int last_row;
|
||||
int last_col;
|
||||
const int last_row;
|
||||
const int last_col;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
@ -567,7 +567,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(data[idx_col(x)]);
|
||||
}
|
||||
|
||||
int width;
|
||||
const int width;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColWrap
|
||||
@ -607,7 +607,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
|
||||
}
|
||||
|
||||
int height;
|
||||
const int height;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdWrap
|
||||
@ -664,8 +664,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
|
||||
}
|
||||
|
||||
int height;
|
||||
int width;
|
||||
const int height;
|
||||
const int width;
|
||||
};
|
||||
|
||||
//////////////////////////////////////////////////////////////
|
||||
@ -683,8 +683,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return b.at(y, x, ptr);
|
||||
}
|
||||
|
||||
Ptr2D ptr;
|
||||
B b;
|
||||
const Ptr2D ptr;
|
||||
const B b;
|
||||
};
|
||||
|
||||
// under win32 there is some bug with templated types that passed as kernel parameters
|
||||
@ -704,10 +704,10 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
int height;
|
||||
int width;
|
||||
D val;
|
||||
const Ptr2D src;
|
||||
const int height;
|
||||
const int width;
|
||||
const D val;
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
|
@ -87,6 +87,15 @@ namespace cv { namespace gpu
|
||||
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
enum
|
||||
{
|
||||
BORDER_REFLECT101_GPU = 0,
|
||||
BORDER_REPLICATE_GPU,
|
||||
BORDER_CONSTANT_GPU,
|
||||
BORDER_REFLECT_GPU,
|
||||
BORDER_WRAP_GPU
|
||||
};
|
||||
|
||||
namespace cudev
|
||||
{
|
||||
__host__ __device__ __forceinline__ int divUp(int total, int grain)
|
||||
|
@ -43,7 +43,6 @@
|
||||
#ifndef OPENCV_GPU_EMULATION_HPP_
|
||||
#define OPENCV_GPU_EMULATION_HPP_
|
||||
|
||||
#include "common.hpp"
|
||||
#include "warp_reduce.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
@ -132,130 +131,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return ::atomicMin(address, val);
|
||||
#endif
|
||||
}
|
||||
}; // struct cmem
|
||||
|
||||
struct glob
|
||||
{
|
||||
static __device__ __forceinline__ int atomicAdd(int* address, int val)
|
||||
{
|
||||
return ::atomicAdd(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ unsigned int atomicAdd(unsigned int* address, unsigned int val)
|
||||
{
|
||||
return ::atomicAdd(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicAdd(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 200
|
||||
return ::atomicAdd(address, val);
|
||||
#else
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(val + __int_as_float(assumed)));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicAdd(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(val + __longlong_as_double(assumed)));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int atomicMin(int* address, int val)
|
||||
{
|
||||
return ::atomicMin(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicMin(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 120
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(::fminf(val, __int_as_float(assumed))));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0f;
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicMin(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(::fmin(val, __longlong_as_double(assumed))));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static __device__ __forceinline__ int atomicMax(int* address, int val)
|
||||
{
|
||||
return ::atomicMax(address, val);
|
||||
}
|
||||
static __device__ __forceinline__ float atomicMax(float* address, float val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 120
|
||||
int* address_as_i = (int*) address;
|
||||
int old = *address_as_i, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_i, assumed,
|
||||
__float_as_int(::fmaxf(val, __int_as_float(assumed))));
|
||||
} while (assumed != old);
|
||||
return __int_as_float(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0f;
|
||||
#endif
|
||||
}
|
||||
static __device__ __forceinline__ double atomicMax(double* address, double val)
|
||||
{
|
||||
#if __CUDA_ARCH__ >= 130
|
||||
unsigned long long int* address_as_ull = (unsigned long long int*) address;
|
||||
unsigned long long int old = *address_as_ull, assumed;
|
||||
do {
|
||||
assumed = old;
|
||||
old = ::atomicCAS(address_as_ull, assumed,
|
||||
__double_as_longlong(::fmax(val, __longlong_as_double(assumed))));
|
||||
} while (assumed != old);
|
||||
return __longlong_as_double(old);
|
||||
#else
|
||||
(void) address;
|
||||
(void) val;
|
||||
return 0.0;
|
||||
#endif
|
||||
}
|
||||
};
|
||||
}; //struct Emulation
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
#endif /* OPENCV_GPU_EMULATION_HPP_ */
|
||||
|
@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return src(__float2int_rz(y), __float2int_rz(x));
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
const Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct LinearFilter
|
||||
@ -107,7 +107,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
const Ptr2D src;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct CubicFilter
|
||||
@ -166,7 +166,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<elem_type>(res);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
const Ptr2D src;
|
||||
};
|
||||
// for integer scaling
|
||||
template <typename Ptr2D> struct IntegerAreaFilter
|
||||
@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y ,scale;
|
||||
};
|
||||
|
||||
@ -269,7 +269,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
Ptr2D src;
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y;
|
||||
int width, haight;
|
||||
};
|
||||
|
@ -552,8 +552,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
__device__ __forceinline__ thresh_binary_func():unary_function<T, T>(){}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
const T thresh;
|
||||
const T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_binary_inv_func : unary_function<T, T>
|
||||
@ -570,8 +570,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
__device__ __forceinline__ thresh_binary_inv_func():unary_function<T, T>(){}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
const T thresh;
|
||||
const T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_trunc_func : unary_function<T, T>
|
||||
@ -588,7 +588,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
__device__ __forceinline__ thresh_trunc_func():unary_function<T, T>(){}
|
||||
|
||||
T thresh;
|
||||
const T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_to_zero_func : unary_function<T, T>
|
||||
@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
__device__ __forceinline__ thresh_to_zero_func():unary_function<T, T>(){}
|
||||
|
||||
T thresh;
|
||||
const T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_to_zero_inv_func : unary_function<T, T>
|
||||
@ -620,7 +620,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
__device__ __forceinline__ thresh_to_zero_inv_func():unary_function<T, T>(){}
|
||||
|
||||
T thresh;
|
||||
const T thresh;
|
||||
};
|
||||
//bound!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! ============>
|
||||
// Function Object Adaptors
|
||||
@ -636,7 +636,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
__device__ __forceinline__ unary_negate(const unary_negate& other) : unary_function<typename Predicate::argument_type, bool>(){}
|
||||
__device__ __forceinline__ unary_negate() : unary_function<typename Predicate::argument_type, bool>(){}
|
||||
|
||||
Predicate pred;
|
||||
const Predicate pred;
|
||||
};
|
||||
|
||||
template <typename Predicate> __host__ __device__ __forceinline__ unary_negate<Predicate> not1(const Predicate& pred)
|
||||
@ -659,7 +659,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
__device__ __forceinline__ binary_negate() :
|
||||
binary_function<typename Predicate::first_argument_type, typename Predicate::second_argument_type, bool>(){}
|
||||
|
||||
Predicate pred;
|
||||
const Predicate pred;
|
||||
};
|
||||
|
||||
template <typename BinaryPredicate> __host__ __device__ __forceinline__ binary_negate<BinaryPredicate> not2(const BinaryPredicate& pred)
|
||||
@ -679,8 +679,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
__device__ __forceinline__ binder1st(const binder1st& other) :
|
||||
unary_function<typename Op::second_argument_type, typename Op::result_type>(){}
|
||||
|
||||
Op op;
|
||||
typename Op::first_argument_type arg1;
|
||||
const Op op;
|
||||
const typename Op::first_argument_type arg1;
|
||||
};
|
||||
|
||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder1st<Op> bind1st(const Op& op, const T& x)
|
||||
@ -700,8 +700,8 @@ namespace cv { namespace gpu { namespace cudev
|
||||
__device__ __forceinline__ binder2nd(const binder2nd& other) :
|
||||
unary_function<typename Op::first_argument_type, typename Op::result_type>(), op(other.op), arg2(other.arg2){}
|
||||
|
||||
Op op;
|
||||
typename Op::second_argument_type arg2;
|
||||
const Op op;
|
||||
const typename Op::second_argument_type arg2;
|
||||
};
|
||||
|
||||
template <typename Op, typename T> __host__ __device__ __forceinline__ binder2nd<Op> bind2nd(const Op& op, const T& x)
|
||||
|
@ -74,6 +74,10 @@
|
||||
namespace cv { namespace gpu {
|
||||
CV_EXPORTS cv::String getNppErrorMessage(int code);
|
||||
CV_EXPORTS cv::String getCudaDriverApiErrorMessage(int code);
|
||||
|
||||
// Converts CPU border extrapolation mode into GPU internal analogue.
|
||||
// Returns true if the GPU analogue exists, false otherwise.
|
||||
CV_EXPORTS bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
|
||||
}}
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
@ -1678,3 +1678,33 @@ String cv::gpu::getCudaDriverApiErrorMessage(int code)
|
||||
return getErrorString(code, cu_errors, cu_errors_num);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) cpuBorderType;
|
||||
(void) gpuBorderType;
|
||||
return false;
|
||||
#else
|
||||
switch (cpuBorderType)
|
||||
{
|
||||
case IPL_BORDER_REFLECT_101:
|
||||
gpuBorderType = cv::gpu::BORDER_REFLECT101_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_REPLICATE:
|
||||
gpuBorderType = cv::gpu::BORDER_REPLICATE_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_CONSTANT:
|
||||
gpuBorderType = cv::gpu::BORDER_CONSTANT_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_REFLECT:
|
||||
gpuBorderType = cv::gpu::BORDER_REFLECT_GPU;
|
||||
return true;
|
||||
case IPL_BORDER_WRAP:
|
||||
gpuBorderType = cv::gpu::BORDER_WRAP_GPU;
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
};
|
||||
#endif
|
||||
}
|
||||
|
@ -3,10 +3,101 @@ if(ANDROID OR IOS)
|
||||
endif()
|
||||
|
||||
set(the_description "GPU-accelerated Computer Vision")
|
||||
ocv_add_module(gpu opencv_imgproc opencv_calib3d opencv_objdetect opencv_video opencv_photo opencv_legacy)
|
||||
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4100 /wd4324 /wd4512 -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter)
|
||||
ocv_module_include_directories("${CMAKE_CURRENT_SOURCE_DIR}/src/cuda")
|
||||
|
||||
ocv_define_module(gpu opencv_calib3d opencv_objdetect opencv_gpuarithm opencv_gpuwarping OPTIONAL opencv_gpulegacy)
|
||||
file(GLOB lib_hdrs "include/opencv2/*.hpp" "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
|
||||
file(GLOB lib_int_hdrs "src/*.hpp" "src/*.h")
|
||||
file(GLOB lib_cuda_hdrs "src/cuda/*.hpp" "src/cuda/*.h")
|
||||
file(GLOB lib_srcs "src/*.cpp")
|
||||
file(GLOB lib_cuda "src/cuda/*.cu*")
|
||||
|
||||
source_group("Include" FILES ${lib_hdrs})
|
||||
source_group("Src\\Host" FILES ${lib_srcs} ${lib_int_hdrs})
|
||||
source_group("Src\\Cuda" FILES ${lib_cuda} ${lib_cuda_hdrs})
|
||||
|
||||
if(HAVE_CUDA)
|
||||
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp" "src/nvidia/*.h*")
|
||||
file(GLOB_RECURSE ncv_cuda "src/nvidia/*.cu")
|
||||
set(ncv_files ${ncv_srcs} ${ncv_cuda})
|
||||
|
||||
source_group("Src\\NVidia" FILES ${ncv_files})
|
||||
ocv_include_directories("src/nvidia" "src/nvidia/core" "src/nvidia/NPP_staging" ${CUDA_INCLUDE_DIRS})
|
||||
ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef -Wmissing-declarations -Wshadow -Wunused-parameter /wd4211 /wd4201 /wd4100 /wd4505 /wd4408)
|
||||
|
||||
if(MSVC)
|
||||
if(NOT ENABLE_NOISY_WARNINGS)
|
||||
foreach(var CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_DEBUG)
|
||||
string(REPLACE "/W4" "/W3" ${var} "${${var}}")
|
||||
endforeach()
|
||||
|
||||
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler /wd4251)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
ocv_cuda_compile(cuda_objs ${lib_cuda} ${ncv_cuda})
|
||||
|
||||
set(cuda_link_libs ${CUDA_LIBRARIES} ${CUDA_npp_LIBRARY})
|
||||
|
||||
if(WITH_NVCUVID)
|
||||
set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvid_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(WIN32)
|
||||
find_cuda_helper_libs(nvcuvenc)
|
||||
set(cuda_link_libs ${cuda_link_libs} ${CUDA_nvcuvenc_LIBRARY})
|
||||
endif()
|
||||
|
||||
if(WITH_FFMPEG)
|
||||
set(cuda_link_libs ${cuda_link_libs} ${HIGHGUI_LIBRARIES})
|
||||
endif()
|
||||
else()
|
||||
set(lib_cuda "")
|
||||
set(cuda_objs "")
|
||||
set(cuda_link_libs "")
|
||||
set(ncv_files "")
|
||||
endif()
|
||||
|
||||
ocv_set_module_sources(
|
||||
HEADERS ${lib_hdrs}
|
||||
SOURCES ${lib_int_hdrs} ${lib_cuda_hdrs} ${lib_srcs} ${lib_cuda} ${ncv_files} ${cuda_objs}
|
||||
)
|
||||
|
||||
ocv_create_module(${cuda_link_libs})
|
||||
|
||||
if(HAVE_CUDA)
|
||||
if(HAVE_CUFFT)
|
||||
CUDA_ADD_CUFFT_TO_TARGET(${the_module})
|
||||
endif()
|
||||
|
||||
if(HAVE_CUBLAS)
|
||||
CUDA_ADD_CUBLAS_TO_TARGET(${the_module})
|
||||
endif()
|
||||
|
||||
install(FILES src/nvidia/NPP_staging/NPP_staging.hpp src/nvidia/core/NCV.hpp
|
||||
DESTINATION ${OPENCV_INCLUDE_INSTALL_PATH}/opencv2/${name}
|
||||
COMPONENT main)
|
||||
endif()
|
||||
|
||||
ocv_add_precompiled_headers(${the_module})
|
||||
|
||||
################################################################################################################
|
||||
################################ GPU Module Tests #####################################################
|
||||
################################################################################################################
|
||||
file(GLOB test_srcs "test/*.cpp")
|
||||
file(GLOB test_hdrs "test/*.hpp" "test/*.h")
|
||||
|
||||
set(nvidia "")
|
||||
if(HAVE_CUDA)
|
||||
file(GLOB nvidia "test/nvidia/*.cpp" "test/nvidia/*.hpp" "test/nvidia/*.h")
|
||||
set(nvidia FILES "Src\\\\\\\\NVidia" ${nvidia}) # 8 ugly backslashes :'(
|
||||
endif()
|
||||
|
||||
ocv_add_accuracy_tests(FILES "Include" ${test_hdrs}
|
||||
FILES "Src" ${test_srcs}
|
||||
${nvidia})
|
||||
ocv_add_perf_tests()
|
||||
|
||||
if(HAVE_CUDA)
|
||||
add_subdirectory(perf4au)
|
||||
|
10
modules/gpu/app/nv_perf_test/CMakeLists.txt
Normal file
10
modules/gpu/app/nv_perf_test/CMakeLists.txt
Normal file
@ -0,0 +1,10 @@
|
||||
cmake_minimum_required(VERSION 2.8.3)
|
||||
|
||||
project(nv_perf_test)
|
||||
|
||||
find_package(OpenCV REQUIRED)
|
||||
include_directories(${OpenCV_INCLUDE_DIR})
|
||||
|
||||
add_executable(${PROJECT_NAME} main.cpp)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME} ${OpenCV_LIBS})
|
BIN
modules/gpu/app/nv_perf_test/im1_1280x800.jpg
Normal file
BIN
modules/gpu/app/nv_perf_test/im1_1280x800.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 140 KiB |
BIN
modules/gpu/app/nv_perf_test/im2_1280x800.jpg
Normal file
BIN
modules/gpu/app/nv_perf_test/im2_1280x800.jpg
Normal file
Binary file not shown.
After Width: | Height: | Size: 140 KiB |
486
modules/gpu/app/nv_perf_test/main.cpp
Normal file
486
modules/gpu/app/nv_perf_test/main.cpp
Normal file
@ -0,0 +1,486 @@
|
||||
#include <cstdio>
|
||||
#define HAVE_CUDA 1
|
||||
#include <opencv2/core.hpp>
|
||||
#include <opencv2/gpu.hpp>
|
||||
#include <opencv2/highgui.hpp>
|
||||
#include <opencv2/video.hpp>
|
||||
#include <opencv2/ts.hpp>
|
||||
|
||||
static void printOsInfo()
|
||||
{
|
||||
#if defined _WIN32
|
||||
# if defined _WIN64
|
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x64.\n[----------]\n"); fflush(stdout);
|
||||
# else
|
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Windows x32.\n[----------]\n"); fflush(stdout);
|
||||
# endif
|
||||
#elif defined linux
|
||||
# if defined _LP64
|
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x64.\n[----------]\n"); fflush(stdout);
|
||||
# else
|
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Linux x32.\n[----------]\n"); fflush(stdout);
|
||||
# endif
|
||||
#elif defined __APPLE__
|
||||
# if defined _LP64
|
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x64.\n[----------]\n"); fflush(stdout);
|
||||
# else
|
||||
printf("[----------]\n[ GPU INFO ] \tRun on OS Apple x32.\n[----------]\n"); fflush(stdout);
|
||||
# endif
|
||||
#endif
|
||||
}
|
||||
|
||||
static void printCudaInfo()
|
||||
{
|
||||
const int deviceCount = cv::gpu::getCudaEnabledDeviceCount();
|
||||
|
||||
printf("[----------]\n"); fflush(stdout);
|
||||
printf("[ GPU INFO ] \tCUDA device count:: %d.\n", deviceCount); fflush(stdout);
|
||||
printf("[----------]\n"); fflush(stdout);
|
||||
|
||||
for (int i = 0; i < deviceCount; ++i)
|
||||
{
|
||||
cv::gpu::DeviceInfo info(i);
|
||||
|
||||
printf("[----------]\n"); fflush(stdout);
|
||||
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()); fflush(stdout);
|
||||
printf("[ ] \tCompute capability: %d.%d\n", info.majorVersion(), info.minorVersion()); fflush(stdout);
|
||||
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()); fflush(stdout);
|
||||
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)); fflush(stdout);
|
||||
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)); fflush(stdout);
|
||||
if (!info.isCompatible())
|
||||
printf("[ GPU INFO ] \tThis device is NOT compatible with current GPU module build\n");
|
||||
printf("[----------]\n"); fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
printOsInfo();
|
||||
printCudaInfo();
|
||||
|
||||
perf::Regression::Init("nv_perf_test");
|
||||
perf::TestBase::Init(argc, argv);
|
||||
testing::InitGoogleTest(&argc, argv);
|
||||
|
||||
return RUN_ALL_TESTS();
|
||||
}
|
||||
|
||||
#define DEF_PARAM_TEST(name, ...) typedef ::perf::TestBaseWithParam< std::tr1::tuple< __VA_ARGS__ > > name
|
||||
#define DEF_PARAM_TEST_1(name, param_type) typedef ::perf::TestBaseWithParam< param_type > name
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// HoughLinesP
|
||||
|
||||
DEF_PARAM_TEST_1(Image, std::string);
|
||||
|
||||
GPU_PERF_TEST_P(Image, HoughLinesP, testing::Values(std::string("im1_1280x800.jpg")))
|
||||
{
|
||||
declare.time(30.0);
|
||||
|
||||
std::string fileName = GetParam();
|
||||
|
||||
const float rho = 1.f;
|
||||
const float theta = 1.f;
|
||||
const int threshold = 40;
|
||||
const int minLineLenght = 20;
|
||||
const int maxLineGap = 5;
|
||||
|
||||
cv::Mat image = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_image(image);
|
||||
cv::gpu::GpuMat d_lines;
|
||||
cv::gpu::HoughLinesBuf d_buf;
|
||||
|
||||
cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat mask;
|
||||
cv::Canny(image, mask, 50, 100);
|
||||
|
||||
std::vector<cv::Vec4i> lines;
|
||||
cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::HoughLinesP(mask, lines, rho, theta, threshold, minLineLenght, maxLineGap);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// GoodFeaturesToTrack
|
||||
|
||||
DEF_PARAM_TEST(Image_Depth, std::string, perf::MatDepth);
|
||||
|
||||
GPU_PERF_TEST_P(Image_Depth, GoodFeaturesToTrack,
|
||||
testing::Combine(
|
||||
testing::Values(std::string("im1_1280x800.jpg")),
|
||||
testing::Values(CV_8U, CV_16U)
|
||||
))
|
||||
{
|
||||
declare.time(60);
|
||||
|
||||
const std::string fileName = std::tr1::get<0>(GetParam());
|
||||
const int depth = std::tr1::get<1>(GetParam());
|
||||
|
||||
const int maxCorners = 5000;
|
||||
const double qualityLevel = 0.05;
|
||||
const int minDistance = 5;
|
||||
const int blockSize = 3;
|
||||
const bool useHarrisDetector = true;
|
||||
const double k = 0.05;
|
||||
|
||||
cv::Mat src = cv::imread(fileName, cv::IMREAD_GRAYSCALE);
|
||||
if (src.empty())
|
||||
FAIL() << "Unable to load source image [" << fileName << "]";
|
||||
|
||||
if (depth != CV_8U)
|
||||
src.convertTo(src, depth);
|
||||
|
||||
cv::Mat mask(src.size(), CV_8UC1, cv::Scalar::all(1));
|
||||
mask(cv::Rect(0, 0, 100, 100)).setTo(cv::Scalar::all(0));
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GoodFeaturesToTrackDetector_GPU d_detector(maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, k);
|
||||
|
||||
cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat d_mask(mask);
|
||||
cv::gpu::GpuMat d_pts;
|
||||
|
||||
d_detector(d_src, d_pts, d_mask);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_detector(d_src, d_pts, d_mask);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (depth != CV_8U)
|
||||
FAIL() << "Unsupported depth";
|
||||
|
||||
cv::Mat pts;
|
||||
|
||||
cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::goodFeaturesToTrack(src, pts, maxCorners, qualityLevel, minDistance, mask, blockSize, useHarrisDetector, k);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// OpticalFlowPyrLKSparse
|
||||
|
||||
typedef std::pair<std::string, std::string> string_pair;
|
||||
|
||||
DEF_PARAM_TEST(ImagePair_Depth_GraySource, string_pair, perf::MatDepth, bool);
|
||||
|
||||
GPU_PERF_TEST_P(ImagePair_Depth_GraySource, OpticalFlowPyrLKSparse,
|
||||
testing::Combine(
|
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
|
||||
testing::Values(CV_8U, CV_16U),
|
||||
testing::Bool()
|
||||
))
|
||||
{
|
||||
declare.time(60);
|
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam());
|
||||
const int depth = std::tr1::get<1>(GetParam());
|
||||
const bool graySource = std::tr1::get<2>(GetParam());
|
||||
|
||||
// PyrLK params
|
||||
const cv::Size winSize(15, 15);
|
||||
const int maxLevel = 5;
|
||||
const cv::TermCriteria criteria(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 30, 0.01);
|
||||
|
||||
// GoodFeaturesToTrack params
|
||||
const int maxCorners = 5000;
|
||||
const double qualityLevel = 0.05;
|
||||
const int minDistance = 5;
|
||||
const int blockSize = 3;
|
||||
const bool useHarrisDetector = true;
|
||||
const double k = 0.05;
|
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
if (src1.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]";
|
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, graySource ? cv::IMREAD_GRAYSCALE : cv::IMREAD_COLOR);
|
||||
if (src2.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]";
|
||||
|
||||
cv::Mat gray_src;
|
||||
if (graySource)
|
||||
gray_src = src1;
|
||||
else
|
||||
cv::cvtColor(src1, gray_src, cv::COLOR_BGR2GRAY);
|
||||
|
||||
cv::Mat pts;
|
||||
cv::goodFeaturesToTrack(gray_src, pts, maxCorners, qualityLevel, minDistance, cv::noArray(), blockSize, useHarrisDetector, k);
|
||||
|
||||
if (depth != CV_8U)
|
||||
{
|
||||
src1.convertTo(src1, depth);
|
||||
src2.convertTo(src2, depth);
|
||||
}
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src1(src1);
|
||||
cv::gpu::GpuMat d_src2(src2);
|
||||
cv::gpu::GpuMat d_pts(pts.reshape(2, 1));
|
||||
cv::gpu::GpuMat d_nextPts;
|
||||
cv::gpu::GpuMat d_status;
|
||||
|
||||
cv::gpu::PyrLKOpticalFlow d_pyrLK;
|
||||
d_pyrLK.winSize = winSize;
|
||||
d_pyrLK.maxLevel = maxLevel;
|
||||
d_pyrLK.iters = criteria.maxCount;
|
||||
d_pyrLK.useInitialFlow = false;
|
||||
|
||||
d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
d_pyrLK.sparse(d_src1, d_src2, d_pts, d_nextPts, d_status);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (depth != CV_8U)
|
||||
FAIL() << "Unsupported depth";
|
||||
|
||||
cv::Mat nextPts;
|
||||
cv::Mat status;
|
||||
|
||||
cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria);
|
||||
|
||||
TEST_CYCLE()
|
||||
{
|
||||
cv::calcOpticalFlowPyrLK(src1, src2, pts, nextPts, status, cv::noArray(), winSize, maxLevel, criteria);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// OpticalFlowFarneback
|
||||
|
||||
DEF_PARAM_TEST(ImagePair_Depth, string_pair, perf::MatDepth);
|
||||
|
||||
GPU_PERF_TEST_P(ImagePair_Depth, OpticalFlowFarneback,
|
||||
testing::Combine(
|
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
|
||||
testing::Values(CV_8U, CV_16U)
|
||||
))
|
||||
{
|
||||
declare.time(500);
|
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam());
|
||||
const int depth = std::tr1::get<1>(GetParam());
|
||||
|
||||
const double pyrScale = 0.5;
|
||||
const int numLevels = 6;
|
||||
const int winSize = 7;
|
||||
const int numIters = 15;
|
||||
const int polyN = 7;
|
||||
const double polySigma = 1.5;
|
||||
const int flags = cv::OPTFLOW_USE_INITIAL_FLOW;
|
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE);
|
||||
if (src1.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]";
|
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE);
|
||||
if (src2.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]";
|
||||
|
||||
if (depth != CV_8U)
|
||||
{
|
||||
src1.convertTo(src1, depth);
|
||||
src2.convertTo(src2, depth);
|
||||
}
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src1(src1);
|
||||
cv::gpu::GpuMat d_src2(src2);
|
||||
cv::gpu::GpuMat d_u(src1.size(), CV_32FC1, cv::Scalar::all(0));
|
||||
cv::gpu::GpuMat d_v(src1.size(), CV_32FC1, cv::Scalar::all(0));
|
||||
|
||||
cv::gpu::FarnebackOpticalFlow d_farneback;
|
||||
d_farneback.pyrScale = pyrScale;
|
||||
d_farneback.numLevels = numLevels;
|
||||
d_farneback.winSize = winSize;
|
||||
d_farneback.numIters = numIters;
|
||||
d_farneback.polyN = polyN;
|
||||
d_farneback.polySigma = polySigma;
|
||||
d_farneback.flags = flags;
|
||||
|
||||
d_farneback(d_src1, d_src2, d_u, d_v);
|
||||
|
||||
TEST_CYCLE_N(10)
|
||||
{
|
||||
d_farneback(d_src1, d_src2, d_u, d_v);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (depth != CV_8U)
|
||||
FAIL() << "Unsupported depth";
|
||||
|
||||
cv::Mat flow(src1.size(), CV_32FC2, cv::Scalar::all(0));
|
||||
|
||||
cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
|
||||
|
||||
TEST_CYCLE_N(10)
|
||||
{
|
||||
cv::calcOpticalFlowFarneback(src1, src2, flow, pyrScale, numLevels, winSize, numIters, polyN, polySigma, flags);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(0);
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////
|
||||
// OpticalFlowBM
|
||||
|
||||
void calcOpticalFlowBM(const cv::Mat& prev, const cv::Mat& curr,
|
||||
cv::Size bSize, cv::Size shiftSize, cv::Size maxRange, int usePrevious,
|
||||
cv::Mat& velx, cv::Mat& vely)
|
||||
{
|
||||
cv::Size sz((curr.cols - bSize.width + shiftSize.width)/shiftSize.width, (curr.rows - bSize.height + shiftSize.height)/shiftSize.height);
|
||||
|
||||
velx.create(sz, CV_32FC1);
|
||||
vely.create(sz, CV_32FC1);
|
||||
|
||||
CvMat cvprev = prev;
|
||||
CvMat cvcurr = curr;
|
||||
|
||||
CvMat cvvelx = velx;
|
||||
CvMat cvvely = vely;
|
||||
|
||||
cvCalcOpticalFlowBM(&cvprev, &cvcurr, bSize, shiftSize, maxRange, usePrevious, &cvvelx, &cvvely);
|
||||
}
|
||||
|
||||
DEF_PARAM_TEST(ImagePair_BlockSize_ShiftSize_MaxRange, string_pair, cv::Size, cv::Size, cv::Size);
|
||||
|
||||
GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM,
|
||||
testing::Combine(
|
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
|
||||
testing::Values(cv::Size(16, 16)),
|
||||
testing::Values(cv::Size(2, 2)),
|
||||
testing::Values(cv::Size(16, 16))
|
||||
))
|
||||
{
|
||||
declare.time(3000);
|
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam());
|
||||
const cv::Size block_size = std::tr1::get<1>(GetParam());
|
||||
const cv::Size shift_size = std::tr1::get<2>(GetParam());
|
||||
const cv::Size max_range = std::tr1::get<3>(GetParam());
|
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE);
|
||||
if (src1.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]";
|
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE);
|
||||
if (src2.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]";
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src1(src1);
|
||||
cv::gpu::GpuMat d_src2(src2);
|
||||
cv::gpu::GpuMat d_velx, d_vely, buf;
|
||||
|
||||
cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
|
||||
|
||||
TEST_CYCLE_N(10)
|
||||
{
|
||||
cv::gpu::calcOpticalFlowBM(d_src1, d_src2, block_size, shift_size, max_range, false, d_velx, d_vely, buf);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat velx, vely;
|
||||
|
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
|
||||
|
||||
TEST_CYCLE_N(10)
|
||||
{
|
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(0);
|
||||
}
|
||||
|
||||
GPU_PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM,
|
||||
testing::Combine(
|
||||
testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")),
|
||||
testing::Values(cv::Size(16, 16)),
|
||||
testing::Values(cv::Size(1, 1)),
|
||||
testing::Values(cv::Size(16, 16))
|
||||
))
|
||||
{
|
||||
declare.time(3000);
|
||||
|
||||
const string_pair fileNames = std::tr1::get<0>(GetParam());
|
||||
const cv::Size block_size = std::tr1::get<1>(GetParam());
|
||||
const cv::Size shift_size = std::tr1::get<2>(GetParam());
|
||||
const cv::Size max_range = std::tr1::get<3>(GetParam());
|
||||
|
||||
cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE);
|
||||
if (src1.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.first << "]";
|
||||
|
||||
cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE);
|
||||
if (src2.empty())
|
||||
FAIL() << "Unable to load source image [" << fileNames.second << "]";
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::GpuMat d_src1(src1);
|
||||
cv::gpu::GpuMat d_src2(src2);
|
||||
cv::gpu::GpuMat d_velx, d_vely;
|
||||
|
||||
cv::gpu::FastOpticalFlowBM fastBM;
|
||||
|
||||
fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width);
|
||||
|
||||
TEST_CYCLE_N(10)
|
||||
{
|
||||
fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat velx, vely;
|
||||
|
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
|
||||
|
||||
TEST_CYCLE_N(10)
|
||||
{
|
||||
calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely);
|
||||
}
|
||||
}
|
||||
|
||||
SANITY_CHECK(0);
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
Camera Calibration and 3D Reconstruction
|
||||
========================================
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
|
||||
|
||||
gpu::solvePnPRansac
|
||||
-------------------
|
||||
Finds the object pose from 3D-2D point correspondences.
|
||||
|
||||
.. ocv:function:: void gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, int num_iters=100, float max_dist=8.0, int min_inlier_count=100, vector<int>* inliers=NULL)
|
||||
|
||||
:param object: Single-row matrix of object points.
|
||||
|
||||
:param image: Single-row matrix of image points.
|
||||
|
||||
:param camera_mat: 3x3 matrix of intrinsic camera parameters.
|
||||
|
||||
:param dist_coef: Distortion coefficients. See :ocv:func:`undistortPoints` for details.
|
||||
|
||||
:param rvec: Output 3D rotation vector.
|
||||
|
||||
:param tvec: Output 3D translation vector.
|
||||
|
||||
:param use_extrinsic_guess: Flag to indicate that the function must use ``rvec`` and ``tvec`` as an initial transformation guess. It is not supported for now.
|
||||
|
||||
:param num_iters: Maximum number of RANSAC iterations.
|
||||
|
||||
:param max_dist: Euclidean distance threshold to detect whether point is inlier or not.
|
||||
|
||||
:param min_inlier_count: Flag to indicate that the function must stop if greater or equal number of inliers is achieved. It is not supported for now.
|
||||
|
||||
:param inliers: Output vector of inlier indices.
|
||||
|
||||
.. seealso:: :ocv:func:`solvePnPRansac`
|
@ -1,5 +1,5 @@
|
||||
Stereo Correspondence
|
||||
=====================
|
||||
Camera Calibration and 3D Reconstruction
|
||||
========================================
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
@ -462,6 +462,38 @@ Reprojects a disparity image to 3D space.
|
||||
|
||||
|
||||
|
||||
gpu::solvePnPRansac
|
||||
-------------------
|
||||
Finds the object pose from 3D-2D point correspondences.
|
||||
|
||||
.. ocv:function:: void gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& camera_mat, const Mat& dist_coef, Mat& rvec, Mat& tvec, bool use_extrinsic_guess=false, int num_iters=100, float max_dist=8.0, int min_inlier_count=100, vector<int>* inliers=NULL)
|
||||
|
||||
:param object: Single-row matrix of object points.
|
||||
|
||||
:param image: Single-row matrix of image points.
|
||||
|
||||
:param camera_mat: 3x3 matrix of intrinsic camera parameters.
|
||||
|
||||
:param dist_coef: Distortion coefficients. See :ocv:func:`undistortPoints` for details.
|
||||
|
||||
:param rvec: Output 3D rotation vector.
|
||||
|
||||
:param tvec: Output 3D translation vector.
|
||||
|
||||
:param use_extrinsic_guess: Flag to indicate that the function must use ``rvec`` and ``tvec`` as an initial transformation guess. It is not supported for now.
|
||||
|
||||
:param num_iters: Maximum number of RANSAC iterations.
|
||||
|
||||
:param max_dist: Euclidean distance threshold to detect whether point is inlier or not.
|
||||
|
||||
:param min_inlier_count: Flag to indicate that the function must stop if greater or equal number of inliers is achieved. It is not supported for now.
|
||||
|
||||
:param inliers: Output vector of inlier indices.
|
||||
|
||||
.. seealso:: :ocv:func:`solvePnPRansac`
|
||||
|
||||
|
||||
|
||||
.. [Felzenszwalb2006] Pedro F. Felzenszwalb algorithm [Pedro F. Felzenszwalb and Daniel P. Huttenlocher. *Efficient belief propagation for early vision*. International Journal of Computer Vision, 70(1), October 2006
|
||||
|
||||
.. [Yang2010] Q. Yang, L. Wang, and N. Ahuja. *A constant-space belief propagation algorithm for stereo matching*. In CVPR, 2010.
|
@ -8,5 +8,12 @@ gpu. GPU-accelerated Computer Vision
|
||||
introduction
|
||||
initalization_and_information
|
||||
data_structures
|
||||
operations_on_matrices
|
||||
per_element_operations
|
||||
image_processing
|
||||
matrix_reductions
|
||||
object_detection
|
||||
calib3d
|
||||
feature_detection_and_description
|
||||
image_filtering
|
||||
camera_calibration_and_3d_reconstruction
|
||||
video
|
||||
|
1087
modules/gpu/doc/image_processing.rst
Normal file
1087
modules/gpu/doc/image_processing.rst
Normal file
File diff suppressed because it is too large
Load Diff
@ -5,6 +5,25 @@ Matrix Reductions
|
||||
|
||||
|
||||
|
||||
gpu::meanStdDev
|
||||
-------------------
|
||||
Computes a mean value and a standard deviation of matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev)
|
||||
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf)
|
||||
|
||||
:param mtx: Source matrix. ``CV_8UC1`` matrices are supported for now.
|
||||
|
||||
:param mean: Mean value.
|
||||
|
||||
:param stddev: Standard deviation value.
|
||||
|
||||
:param buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
.. seealso:: :ocv:func:`meanStdDev`
|
||||
|
||||
|
||||
|
||||
gpu::norm
|
||||
-------------
|
||||
Returns the norm of a matrix (or difference of two matrices).
|
||||
@ -186,70 +205,3 @@ Reduces a matrix to a vector.
|
||||
The function ``reduce`` reduces the matrix to a vector by treating the matrix rows/columns as a set of 1D vectors and performing the specified operation on the vectors until a single row/column is obtained. For example, the function can be used to compute horizontal and vertical projections of a raster image. In case of ``CV_REDUCE_SUM`` and ``CV_REDUCE_AVG`` , the output may have a larger element bit-depth to preserve accuracy. And multi-channel arrays are also supported in these two reduction modes.
|
||||
|
||||
.. seealso:: :ocv:func:`reduce`
|
||||
|
||||
|
||||
|
||||
gpu::normalize
|
||||
--------------
|
||||
Normalizes the norm or value range of an array.
|
||||
|
||||
.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat())
|
||||
|
||||
.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
|
||||
|
||||
:param src: input array.
|
||||
|
||||
:param dst: output array of the same size as ``src`` .
|
||||
|
||||
:param alpha: norm value to normalize to or the lower range boundary in case of the range normalization.
|
||||
|
||||
:param beta: upper range boundary in case of the range normalization; it is not used for the norm normalization.
|
||||
|
||||
:param normType: normalization type (see the details below).
|
||||
|
||||
:param dtype: when negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``.
|
||||
|
||||
:param mask: optional operation mask.
|
||||
|
||||
:param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
:param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
.. seealso:: :ocv:func:`normalize`
|
||||
|
||||
|
||||
|
||||
gpu::meanStdDev
|
||||
-------------------
|
||||
Computes a mean value and a standard deviation of matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev)
|
||||
.. ocv:function:: void gpu::meanStdDev(const GpuMat& mtx, Scalar& mean, Scalar& stddev, GpuMat& buf)
|
||||
|
||||
:param mtx: Source matrix. ``CV_8UC1`` matrices are supported for now.
|
||||
|
||||
:param mean: Mean value.
|
||||
|
||||
:param stddev: Standard deviation value.
|
||||
|
||||
:param buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
.. seealso:: :ocv:func:`meanStdDev`
|
||||
|
||||
|
||||
|
||||
gpu::rectStdDev
|
||||
-------------------
|
||||
Computes a standard deviation of integral images.
|
||||
|
||||
.. ocv:function:: void gpu::rectStdDev(const GpuMat& src, const GpuMat& sqr, GpuMat& dst, const Rect& rect, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source image. Only the ``CV_32SC1`` type is supported.
|
||||
|
||||
:param sqr: Squared source image. Only the ``CV_32FC1`` type is supported.
|
||||
|
||||
:param dst: Destination image with the same type and size as ``src`` .
|
||||
|
||||
:param rect: Rectangular window.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
274
modules/gpu/doc/operations_on_matrices.rst
Normal file
274
modules/gpu/doc/operations_on_matrices.rst
Normal file
@ -0,0 +1,274 @@
|
||||
Operations on Matrices
|
||||
======================
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
|
||||
|
||||
gpu::gemm
|
||||
------------------
|
||||
Performs generalized matrix multiplication.
|
||||
|
||||
.. ocv:function:: void gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags = 0, Stream& stream = Stream::Null())
|
||||
|
||||
:param src1: First multiplied input matrix that should have ``CV_32FC1`` , ``CV_64FC1`` , ``CV_32FC2`` , or ``CV_64FC2`` type.
|
||||
|
||||
:param src2: Second multiplied input matrix of the same type as ``src1`` .
|
||||
|
||||
:param alpha: Weight of the matrix product.
|
||||
|
||||
:param src3: Third optional delta matrix added to the matrix product. It should have the same type as ``src1`` and ``src2`` .
|
||||
|
||||
:param beta: Weight of ``src3`` .
|
||||
|
||||
:param dst: Destination matrix. It has the proper size and the same type as input matrices.
|
||||
|
||||
:param flags: Operation flags:
|
||||
|
||||
* **GEMM_1_T** transpose ``src1``
|
||||
* **GEMM_2_T** transpose ``src2``
|
||||
* **GEMM_3_T** transpose ``src3``
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
The function performs generalized matrix multiplication similar to the ``gemm`` functions in BLAS level 3. For example, ``gemm(src1, src2, alpha, src3, beta, dst, GEMM_1_T + GEMM_3_T)`` corresponds to
|
||||
|
||||
.. math::
|
||||
|
||||
\texttt{dst} = \texttt{alpha} \cdot \texttt{src1} ^T \cdot \texttt{src2} + \texttt{beta} \cdot \texttt{src3} ^T
|
||||
|
||||
.. note:: Transposition operation doesn't support ``CV_64FC2`` input type.
|
||||
|
||||
.. seealso:: :ocv:func:`gemm`
|
||||
|
||||
|
||||
|
||||
gpu::transpose
|
||||
------------------
|
||||
Transposes a matrix.
|
||||
|
||||
.. ocv:function:: void gpu::transpose( const GpuMat& src1, GpuMat& dst, Stream& stream=Stream::Null() )
|
||||
|
||||
:param src1: Source matrix. 1-, 4-, 8-byte element sizes are supported for now (CV_8UC1, CV_8UC4, CV_16UC2, CV_32FC1, etc).
|
||||
|
||||
:param dst: Destination matrix.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`transpose`
|
||||
|
||||
|
||||
|
||||
gpu::flip
|
||||
-------------
|
||||
Flips a 2D matrix around vertical, horizontal, or both axes.
|
||||
|
||||
.. ocv:function:: void gpu::flip( const GpuMat& a, GpuMat& b, int flipCode, Stream& stream=Stream::Null() )
|
||||
|
||||
:param a: Source matrix. Supports 1, 3 and 4 channels images with ``CV_8U``, ``CV_16U``, ``CV_32S`` or ``CV_32F`` depth.
|
||||
|
||||
:param b: Destination matrix.
|
||||
|
||||
:param flipCode: Flip mode for the source:
|
||||
|
||||
* ``0`` Flips around x-axis.
|
||||
|
||||
* ``>0`` Flips around y-axis.
|
||||
|
||||
* ``<0`` Flips around both axes.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`flip`
|
||||
|
||||
|
||||
|
||||
gpu::LUT
|
||||
------------
|
||||
Transforms the source matrix into the destination matrix using the given look-up table: ``dst(I) = lut(src(I))``
|
||||
|
||||
.. ocv:function:: void gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix. ``CV_8UC1`` and ``CV_8UC3`` matrices are supported for now.
|
||||
|
||||
:param lut: Look-up table of 256 elements. It is a continuous ``CV_8U`` matrix.
|
||||
|
||||
:param dst: Destination matrix with the same depth as ``lut`` and the same number of channels as ``src`` .
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`LUT`
|
||||
|
||||
|
||||
|
||||
gpu::merge
|
||||
--------------
|
||||
Makes a multi-channel matrix out of several single-channel matrices.
|
||||
|
||||
.. ocv:function:: void gpu::merge(const GpuMat* src, size_t n, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
.. ocv:function:: void gpu::merge(const vector<GpuMat>& src, GpuMat& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Array/vector of source matrices.
|
||||
|
||||
:param n: Number of source matrices.
|
||||
|
||||
:param dst: Destination matrix.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`merge`
|
||||
|
||||
|
||||
|
||||
gpu::split
|
||||
--------------
|
||||
Copies each plane of a multi-channel matrix into an array.
|
||||
|
||||
.. ocv:function:: void gpu::split(const GpuMat& src, GpuMat* dst, Stream& stream = Stream::Null())
|
||||
|
||||
.. ocv:function:: void gpu::split(const GpuMat& src, vector<GpuMat>& dst, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source matrix.
|
||||
|
||||
:param dst: Destination array/vector of single-channel matrices.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`split`
|
||||
|
||||
|
||||
|
||||
gpu::magnitude
|
||||
------------------
|
||||
Computes magnitudes of complex matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::magnitude( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
|
||||
|
||||
.. ocv:function:: void gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
|
||||
|
||||
:param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`magnitude`
|
||||
|
||||
|
||||
|
||||
gpu::magnitudeSqr
|
||||
---------------------
|
||||
Computes squared magnitudes of complex matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::magnitudeSqr( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
|
||||
|
||||
.. ocv:function:: void gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
|
||||
|
||||
:param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param magnitude: Destination matrix of float magnitude squares ( ``CV_32FC1`` ).
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::phase
|
||||
--------------
|
||||
Computes polar angles of complex matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param angle: Destination matrix of angles ( ``CV_32FC1`` ).
|
||||
|
||||
:param angleInDegrees: Flag for angles that must be evaluated in degrees.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`phase`
|
||||
|
||||
|
||||
|
||||
gpu::cartToPolar
|
||||
--------------------
|
||||
Converts Cartesian coordinates into polar.
|
||||
|
||||
.. ocv:function:: void gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
|
||||
|
||||
:param angle: Destination matrix of angles ( ``CV_32FC1`` ).
|
||||
|
||||
:param angleInDegrees: Flag for angles that must be evaluated in degrees.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`cartToPolar`
|
||||
|
||||
|
||||
|
||||
gpu::polarToCart
|
||||
--------------------
|
||||
Converts polar coordinates into Cartesian.
|
||||
|
||||
.. ocv:function:: void gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees=false, Stream& stream = Stream::Null())
|
||||
|
||||
:param magnitude: Source matrix containing magnitudes ( ``CV_32FC1`` ).
|
||||
|
||||
:param angle: Source matrix containing angles ( ``CV_32FC1`` ).
|
||||
|
||||
:param x: Destination matrix of real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Destination matrix of imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param angleInDegrees: Flag that indicates angles in degrees.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`polarToCart`
|
||||
|
||||
|
||||
|
||||
gpu::normalize
|
||||
--------------
|
||||
Normalizes the norm or value range of an array.
|
||||
|
||||
.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double alpha = 1, double beta = 0, int norm_type = NORM_L2, int dtype = -1, const GpuMat& mask = GpuMat())
|
||||
|
||||
.. ocv:function:: void gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
|
||||
|
||||
:param src: input array.
|
||||
|
||||
:param dst: output array of the same size as ``src`` .
|
||||
|
||||
:param alpha: norm value to normalize to or the lower range boundary in case of the range normalization.
|
||||
|
||||
:param beta: upper range boundary in case of the range normalization; it is not used for the norm normalization.
|
||||
|
||||
:param normType: normalization type (see the details below).
|
||||
|
||||
:param dtype: when negative, the output array has the same type as ``src``; otherwise, it has the same number of channels as ``src`` and the depth ``=CV_MAT_DEPTH(dtype)``.
|
||||
|
||||
:param mask: optional operation mask.
|
||||
|
||||
:param norm_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
:param cvt_buf: Optional buffer to avoid extra memory allocations. It is resized automatically.
|
||||
|
||||
.. seealso:: :ocv:func:`normalize`
|
@ -1,5 +1,5 @@
|
||||
Per-element Operations
|
||||
======================
|
||||
=======================
|
||||
|
||||
.. highlight:: cpp
|
||||
|
||||
@ -112,7 +112,6 @@ This function, in contrast to :ocv:func:`divide`, uses a round-down rounding mod
|
||||
.. seealso:: :ocv:func:`divide`
|
||||
|
||||
|
||||
|
||||
gpu::addWeighted
|
||||
----------------
|
||||
Computes the weighted sum of two arrays.
|
||||
@ -444,131 +443,3 @@ Computes the per-element maximum of two matrices (or a matrix and a scalar).
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`max`
|
||||
|
||||
|
||||
|
||||
gpu::threshold
|
||||
------------------
|
||||
Applies a fixed-level threshold to each array element.
|
||||
|
||||
.. ocv:function:: double gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double maxval, int type, Stream& stream = Stream::Null())
|
||||
|
||||
:param src: Source array (single-channel).
|
||||
|
||||
:param dst: Destination array with the same size and type as ``src`` .
|
||||
|
||||
:param thresh: Threshold value.
|
||||
|
||||
:param maxval: Maximum value to use with ``THRESH_BINARY`` and ``THRESH_BINARY_INV`` threshold types.
|
||||
|
||||
:param type: Threshold type. For details, see :ocv:func:`threshold` . The ``THRESH_OTSU`` threshold type is not supported.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`threshold`
|
||||
|
||||
|
||||
|
||||
gpu::magnitude
|
||||
------------------
|
||||
Computes magnitudes of complex matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::magnitude( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
|
||||
|
||||
.. ocv:function:: void gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
|
||||
|
||||
:param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`magnitude`
|
||||
|
||||
|
||||
|
||||
gpu::magnitudeSqr
|
||||
---------------------
|
||||
Computes squared magnitudes of complex matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::magnitudeSqr( const GpuMat& xy, GpuMat& magnitude, Stream& stream=Stream::Null() )
|
||||
|
||||
.. ocv:function:: void gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, Stream& stream = Stream::Null())
|
||||
|
||||
:param xy: Source complex matrix in the interleaved format ( ``CV_32FC2`` ).
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param magnitude: Destination matrix of float magnitude squares ( ``CV_32FC1`` ).
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
|
||||
|
||||
gpu::phase
|
||||
--------------
|
||||
Computes polar angles of complex matrix elements.
|
||||
|
||||
.. ocv:function:: void gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param angle: Destination matrix of angles ( ``CV_32FC1`` ).
|
||||
|
||||
:param angleInDegrees: Flag for angles that must be evaluated in degrees.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`phase`
|
||||
|
||||
|
||||
|
||||
gpu::cartToPolar
|
||||
--------------------
|
||||
Converts Cartesian coordinates into polar.
|
||||
|
||||
.. ocv:function:: void gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& magnitude, GpuMat& angle, bool angleInDegrees=false, Stream& stream = Stream::Null())
|
||||
|
||||
:param x: Source matrix containing real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Source matrix containing imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param magnitude: Destination matrix of float magnitudes ( ``CV_32FC1`` ).
|
||||
|
||||
:param angle: Destination matrix of angles ( ``CV_32FC1`` ).
|
||||
|
||||
:param angleInDegrees: Flag for angles that must be evaluated in degrees.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`cartToPolar`
|
||||
|
||||
|
||||
|
||||
gpu::polarToCart
|
||||
--------------------
|
||||
Converts polar coordinates into Cartesian.
|
||||
|
||||
.. ocv:function:: void gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees=false, Stream& stream = Stream::Null())
|
||||
|
||||
:param magnitude: Source matrix containing magnitudes ( ``CV_32FC1`` ).
|
||||
|
||||
:param angle: Source matrix containing angles ( ``CV_32FC1`` ).
|
||||
|
||||
:param x: Destination matrix of real components ( ``CV_32FC1`` ).
|
||||
|
||||
:param y: Destination matrix of imaginary components ( ``CV_32FC1`` ).
|
||||
|
||||
:param angleInDegrees: Flag that indicates angles in degrees.
|
||||
|
||||
:param stream: Stream for the asynchronous version.
|
||||
|
||||
.. seealso:: :ocv:func:`polarToCart`
|
1142
modules/gpu/doc/video.rst
Normal file
1142
modules/gpu/doc/video.rst
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -40,4 +40,4 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "test_precomp.hpp"
|
||||
#include "opencv2/core/cuda_devptrs.hpp"
|
@ -40,4 +40,4 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include "opencv2/core/gpumat.hpp"
|
@ -46,8 +46,181 @@ using namespace std;
|
||||
using namespace testing;
|
||||
using namespace perf;
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBM
|
||||
|
||||
typedef std::tr1::tuple<string, string> pair_string;
|
||||
DEF_PARAM_TEST_1(ImagePair, pair_string);
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBM,
|
||||
Values(pair_string("gpu/perf/aloe.png", "gpu/perf/aloeR.png")))
|
||||
{
|
||||
declare.time(300.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int preset = 0;
|
||||
const int ndisp = 256;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBM_GPU d_bm(preset, ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_bm(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Ptr<cv::StereoBM> bm = cv::createStereoBM(ndisp);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
TEST_CYCLE() bm->compute(imgLeft, imgRight, dst);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoBeliefPropagation
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoBeliefPropagation,
|
||||
Values(pair_string("gpu/stereobp/aloe-L.png", "gpu/stereobp/aloe-R.png")))
|
||||
{
|
||||
declare.time(300.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0));
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1));
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 64;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoBeliefPropagation d_bp(ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_bp(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// StereoConstantSpaceBP
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_StereoConstantSpaceBP,
|
||||
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-R.png")))
|
||||
{
|
||||
declare.time(300.0);
|
||||
|
||||
const cv::Mat imgLeft = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgLeft.empty());
|
||||
|
||||
const cv::Mat imgRight = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(imgRight.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::StereoConstantSpaceBP d_csbp(ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_imgLeft(imgLeft);
|
||||
const cv::gpu::GpuMat d_imgRight(imgRight);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_csbp(d_imgLeft, d_imgRight, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DisparityBilateralFilter
|
||||
|
||||
PERF_TEST_P(ImagePair, Calib3D_DisparityBilateralFilter,
|
||||
Values(pair_string("gpu/stereobm/aloe-L.png", "gpu/stereobm/aloe-disp.png")))
|
||||
{
|
||||
const cv::Mat img = readImage(GET_PARAM(0), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
const cv::Mat disp = readImage(GET_PARAM(1), cv::IMREAD_GRAYSCALE);
|
||||
ASSERT_FALSE(disp.empty());
|
||||
|
||||
const int ndisp = 128;
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
cv::gpu::DisparityBilateralFilter d_filter(ndisp);
|
||||
|
||||
const cv::gpu::GpuMat d_img(img);
|
||||
const cv::gpu::GpuMat d_disp(disp);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() d_filter(d_disp, d_img, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// TransformPoints
|
||||
|
||||
DEF_PARAM_TEST_1(Count, int);
|
||||
|
||||
PERF_TEST_P(Count, Calib3D_TransformPoints,
|
||||
Values(5000, 10000, 20000))
|
||||
{
|
||||
const int count = GetParam();
|
||||
|
||||
cv::Mat src(1, count, CV_32FC3);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
const cv::Mat rvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
const cv::Mat tvec = cv::Mat::ones(1, 3, CV_32FC1);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::transformPoints(d_src, rvec, tvec, dst);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ProjectPoints
|
||||
|
||||
@ -133,3 +306,66 @@ PERF_TEST_P(Count, Calib3D_SolvePnPRansac,
|
||||
CPU_SANITY_CHECK(tvec, 1e-6);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// ReprojectImageTo3D
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_ReprojectImageTo3D,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, depth);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
cv::Mat Q(4, 4, CV_32FC1);
|
||||
cv::randu(Q, 0.1, 1.0);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::reprojectImageTo3D(d_src, dst, Q);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
TEST_CYCLE() cv::reprojectImageTo3D(src, dst, Q);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// DrawColorDisp
|
||||
|
||||
PERF_TEST_P(Sz_Depth, Calib3D_DrawColorDisp,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8U, CV_16S)))
|
||||
{
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int type = GET_PARAM(1);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::drawColorDisp(d_src, dst, 255);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
FAIL_NO_CPU();
|
||||
}
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -42,25 +42,64 @@
|
||||
|
||||
#include "perf_precomp.hpp"
|
||||
|
||||
#include "opencv2/photo/gpu.hpp"
|
||||
#include "opencv2/ts/gpu_perf.hpp"
|
||||
|
||||
#include "opencv2/opencv_modules.hpp"
|
||||
|
||||
#if defined (HAVE_CUDA) && defined(HAVE_OPENCV_GPUARITHM) && defined(HAVE_OPENCV_GPUIMGPROC)
|
||||
|
||||
using namespace std;
|
||||
using namespace testing;
|
||||
using namespace perf;
|
||||
|
||||
#define GPU_DENOISING_IMAGE_SIZES testing::Values(perf::szVGA, perf::sz720p)
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BilateralFilter
|
||||
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn_KernelSz, cv::Size, MatDepth, MatCn, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn_KernelSz, Denoising_BilateralFilter,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values(CV_8U, CV_32F),
|
||||
GPU_CHANNELS_1_3,
|
||||
Values(3, 5, 9)))
|
||||
{
|
||||
declare.time(60.0);
|
||||
|
||||
const cv::Size size = GET_PARAM(0);
|
||||
const int depth = GET_PARAM(1);
|
||||
const int channels = GET_PARAM(2);
|
||||
const int kernel_size = GET_PARAM(3);
|
||||
|
||||
const float sigma_color = 7;
|
||||
const float sigma_spatial = 5;
|
||||
const int borderMode = cv::BORDER_REFLECT101;
|
||||
|
||||
const int type = CV_MAKE_TYPE(depth, channels);
|
||||
|
||||
cv::Mat src(size, type);
|
||||
declare.in(src, WARMUP_RNG);
|
||||
|
||||
if (PERF_RUN_GPU())
|
||||
{
|
||||
const cv::gpu::GpuMat d_src(src);
|
||||
cv::gpu::GpuMat dst;
|
||||
|
||||
TEST_CYCLE() cv::gpu::bilateralFilter(d_src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::Mat dst;
|
||||
|
||||
TEST_CYCLE() cv::bilateralFilter(src, dst, kernel_size, sigma_color, sigma_spatial, borderMode);
|
||||
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// nonLocalMeans
|
||||
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_NonLocalMeans,
|
||||
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_NonLocalMeans,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values<MatDepth>(CV_8U),
|
||||
GPU_CHANNELS_1_3,
|
||||
@ -104,7 +143,7 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_NonLocalMeans,
|
||||
|
||||
DEF_PARAM_TEST(Sz_Depth_Cn_WinSz_BlockSz, cv::Size, MatDepth, MatCn, int, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_FastNonLocalMeans,
|
||||
PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, Denoising_FastNonLocalMeans,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values<MatDepth>(CV_8U),
|
||||
GPU_CHANNELS_1_3,
|
||||
@ -150,7 +189,7 @@ PERF_TEST_P(Sz_Depth_Cn_WinSz_BlockSz, GPU_FastNonLocalMeans,
|
||||
|
||||
DEF_PARAM_TEST(Sz_Depth_WinSz_BlockSz, cv::Size, MatDepth, int, int);
|
||||
|
||||
PERF_TEST_P(Sz_Depth_WinSz_BlockSz, GPU_FastNonLocalMeansColored,
|
||||
PERF_TEST_P(Sz_Depth_WinSz_BlockSz, Denoising_FastNonLocalMeansColored,
|
||||
Combine(GPU_DENOISING_IMAGE_SIZES,
|
||||
Values<MatDepth>(CV_8U),
|
||||
Values(21),
|
||||
@ -189,5 +228,3 @@ PERF_TEST_P(Sz_Depth_WinSz_BlockSz, GPU_FastNonLocalMeansColored,
|
||||
CPU_SANITY_CHECK(dst);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
@ -51,7 +51,7 @@ using namespace perf;
|
||||
|
||||
DEF_PARAM_TEST(Image_Threshold_NonMaxSupression, string, int, bool);
|
||||
|
||||
PERF_TEST_P(Image_Threshold_NonMaxSupression, FAST,
|
||||
PERF_TEST_P(Image_Threshold_NonMaxSupression, Features2D_FAST,
|
||||
Combine(Values<string>("gpu/perf/aloe.png"),
|
||||
Values(20),
|
||||
Bool()))
|
||||
@ -93,7 +93,7 @@ PERF_TEST_P(Image_Threshold_NonMaxSupression, FAST,
|
||||
|
||||
DEF_PARAM_TEST(Image_NFeatures, string, int);
|
||||
|
||||
PERF_TEST_P(Image_NFeatures, ORB,
|
||||
PERF_TEST_P(Image_NFeatures, Features2D_ORB,
|
||||
Combine(Values<string>("gpu/perf/aloe.png"),
|
||||
Values(4000)))
|
||||
{
|
||||
@ -145,7 +145,7 @@ PERF_TEST_P(Image_NFeatures, ORB,
|
||||
|
||||
DEF_PARAM_TEST(DescSize_Norm, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, BFMatch,
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFMatch,
|
||||
Combine(Values(64, 128, 256),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2), NormType(cv::NORM_HAMMING))))
|
||||
{
|
||||
@ -202,7 +202,7 @@ static void toOneRowMatches(const std::vector< std::vector<cv::DMatch> >& src, s
|
||||
|
||||
DEF_PARAM_TEST(DescSize_K_Norm, int, int, NormType);
|
||||
|
||||
PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
|
||||
PERF_TEST_P(DescSize_K_Norm, Features2D_BFKnnMatch,
|
||||
Combine(Values(64, 128, 256),
|
||||
Values(2, 3),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
|
||||
@ -257,7 +257,7 @@ PERF_TEST_P(DescSize_K_Norm, BFKnnMatch,
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// BFRadiusMatch
|
||||
|
||||
PERF_TEST_P(DescSize_Norm, BFRadiusMatch,
|
||||
PERF_TEST_P(DescSize_Norm, Features2D_BFRadiusMatch,
|
||||
Combine(Values(64, 128, 256),
|
||||
Values(NormType(cv::NORM_L1), NormType(cv::NORM_L2))))
|
||||
{
|
@ -51,7 +51,7 @@ using namespace perf;
|
||||
|
||||
DEF_PARAM_TEST(Sz_Type_KernelSz, cv::Size, MatType, int);
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Blur,
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Blur,
|
||||
Combine(GPU_TYPICAL_MAT_SIZES,
|
||||
Values(CV_8UC1, CV_8UC4),
|
||||
Values(3, 5, 7)))
|
||||
@ -87,7 +87,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Sobel
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
@ -121,7 +121,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Sobel, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8U
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Scharr
|
||||
|
||||
PERF_TEST_P(Sz_Type, Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
|
||||
PERF_TEST_P(Sz_Type, Filters_Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
@ -154,7 +154,7 @@ PERF_TEST_P(Sz_Type, Scharr, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// GaussianBlur
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
@ -188,7 +188,7 @@ PERF_TEST_P(Sz_Type_KernelSz, GaussianBlur, Combine(GPU_TYPICAL_MAT_SIZES, Value
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Laplacian
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(1, 3)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
@ -221,7 +221,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Laplacian, Combine(GPU_TYPICAL_MAT_SIZES, Values(C
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Erode
|
||||
|
||||
PERF_TEST_P(Sz_Type, Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
PERF_TEST_P(Sz_Type, Filters_Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
@ -256,7 +256,7 @@ PERF_TEST_P(Sz_Type, Erode, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8U
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Dilate
|
||||
|
||||
PERF_TEST_P(Sz_Type, Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
PERF_TEST_P(Sz_Type, Filters_Dilate, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
@ -295,7 +295,7 @@ CV_ENUM(MorphOp, MORPH_OPEN, MORPH_CLOSE, MORPH_GRADIENT, MORPH_TOPHAT, MORPH_BL
|
||||
|
||||
DEF_PARAM_TEST(Sz_Type_Op, cv::Size, MatType, MorphOp);
|
||||
|
||||
PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), MorphOp::all()))
|
||||
PERF_TEST_P(Sz_Type_Op, Filters_MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4), MorphOp::all()))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
||||
@ -332,7 +332,7 @@ PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
|
||||
//////////////////////////////////////////////////////////////////////
|
||||
// Filter2D
|
||||
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
PERF_TEST_P(Sz_Type_KernelSz, Filters_Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||
{
|
||||
declare.time(20.0);
|
||||
|
1904
modules/gpu/perf/perf_imgproc.cpp
Normal file
1904
modules/gpu/perf/perf_imgproc.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -51,12 +51,21 @@
|
||||
#ifndef __OPENCV_PERF_PRECOMP_HPP__
|
||||
#define __OPENCV_PERF_PRECOMP_HPP__
|
||||
|
||||
#include <cstdio>
|
||||
#include <iostream>
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/gpu_perf.hpp"
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include "opencv2/gpu.hpp"
|
||||
#include "opencv2/calib3d.hpp"
|
||||
#include "opencv2/objdetect.hpp"
|
||||
#include "opencv2/imgproc.hpp"
|
||||
#include "opencv2/video.hpp"
|
||||
#include "opencv2/photo.hpp"
|
||||
|
||||
#include "opencv2/core/gpu_private.hpp"
|
||||
|
||||
#ifdef GTEST_CREATE_SHARED_LIBRARY
|
||||
#error no modules except ts should have GTEST_CREATE_SHARED_LIBRARY defined
|
||||
|
1107
modules/gpu/perf/perf_video.cpp
Normal file
1107
modules/gpu/perf/perf_video.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,4 +1,4 @@
|
||||
set(PERF4AU_REQUIRED_DEPS opencv_core opencv_imgproc opencv_highgui opencv_video opencv_legacy opencv_ml opencv_ts opencv_gpufilters opencv_gpuimgproc opencv_gpuoptflow)
|
||||
set(PERF4AU_REQUIRED_DEPS opencv_core opencv_imgproc opencv_highgui opencv_video opencv_legacy opencv_gpu opencv_ts)
|
||||
|
||||
ocv_check_dependencies(${PERF4AU_REQUIRED_DEPS})
|
||||
|
||||
@ -25,3 +25,4 @@ if(WIN32)
|
||||
set_target_properties(${the_target} PROPERTIES LINK_FLAGS "/NODEFAULTLIB:atlthunk.lib /NODEFAULTLIB:atlsd.lib /DEBUG")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
@ -40,15 +40,18 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/gpu_perf.hpp"
|
||||
|
||||
#include "opencv2/gpuimgproc.hpp"
|
||||
#include "opencv2/gpuoptflow.hpp"
|
||||
#include <cstdio>
|
||||
|
||||
#ifdef HAVE_CVCONFIG_H
|
||||
#include "cvconfig.h"
|
||||
#endif
|
||||
#include "opencv2/core.hpp"
|
||||
#include "opencv2/gpu.hpp"
|
||||
#include "opencv2/highgui.hpp"
|
||||
#include "opencv2/video.hpp"
|
||||
#include "opencv2/legacy.hpp"
|
||||
#include "opencv2/ts.hpp"
|
||||
#include "opencv2/ts/gpu_perf.hpp"
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
|
565
modules/gpu/src/arithm.cpp
Normal file
565
modules/gpu/src/arithm.cpp
Normal file
@ -0,0 +1,565 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::gemm(const GpuMat&, const GpuMat&, double, const GpuMat&, double, GpuMat&, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::transpose(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::flip(const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::magnitude(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::magnitudeSqr(const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::magnitudeSqr(const GpuMat&, const GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::phase(const GpuMat&, const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::polarToCart(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
|
||||
void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&) { throw_no_cuda(); }
|
||||
void cv::gpu::normalize(const GpuMat&, GpuMat&, double, double, int, int, const GpuMat&, GpuMat&, GpuMat&) { throw_no_cuda(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// gemm
|
||||
|
||||
void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const GpuMat& src3, double beta, GpuMat& dst, int flags, Stream& stream)
|
||||
{
|
||||
#ifndef HAVE_CUBLAS
|
||||
(void)src1;
|
||||
(void)src2;
|
||||
(void)alpha;
|
||||
(void)src3;
|
||||
(void)beta;
|
||||
(void)dst;
|
||||
(void)flags;
|
||||
(void)stream;
|
||||
CV_Error(cv::Error::StsNotImplemented, "The library was build without CUBLAS");
|
||||
#else
|
||||
// CUBLAS works with column-major matrices
|
||||
|
||||
CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2);
|
||||
CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()));
|
||||
|
||||
if (src1.depth() == CV_64F)
|
||||
{
|
||||
if (!deviceSupports(NATIVE_DOUBLE))
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
|
||||
}
|
||||
|
||||
bool tr1 = (flags & GEMM_1_T) != 0;
|
||||
bool tr2 = (flags & GEMM_2_T) != 0;
|
||||
bool tr3 = (flags & GEMM_3_T) != 0;
|
||||
|
||||
if (src1.type() == CV_64FC2)
|
||||
{
|
||||
if (tr1 || tr2 || tr3)
|
||||
CV_Error(cv::Error::StsNotImplemented, "transpose operation doesn't implemented for CV_64FC2 type");
|
||||
}
|
||||
|
||||
Size src1Size = tr1 ? Size(src1.rows, src1.cols) : src1.size();
|
||||
Size src2Size = tr2 ? Size(src2.rows, src2.cols) : src2.size();
|
||||
Size src3Size = tr3 ? Size(src3.rows, src3.cols) : src3.size();
|
||||
Size dstSize(src2Size.width, src1Size.height);
|
||||
|
||||
CV_Assert(src1Size.width == src2Size.height);
|
||||
CV_Assert(src3.empty() || src3Size == dstSize);
|
||||
|
||||
dst.create(dstSize, src1.type());
|
||||
|
||||
if (beta != 0)
|
||||
{
|
||||
if (src3.empty())
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueMemSet(dst, Scalar::all(0));
|
||||
else
|
||||
dst.setTo(Scalar::all(0));
|
||||
}
|
||||
else
|
||||
{
|
||||
if (tr3)
|
||||
{
|
||||
transpose(src3, dst, stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (stream)
|
||||
stream.enqueueCopy(src3, dst);
|
||||
else
|
||||
src3.copyTo(dst);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
cublasHandle_t handle;
|
||||
cublasSafeCall( cublasCreate_v2(&handle) );
|
||||
|
||||
cublasSafeCall( cublasSetStream_v2(handle, StreamAccessor::getStream(stream)) );
|
||||
|
||||
cublasSafeCall( cublasSetPointerMode_v2(handle, CUBLAS_POINTER_MODE_HOST) );
|
||||
|
||||
const float alphaf = static_cast<float>(alpha);
|
||||
const float betaf = static_cast<float>(beta);
|
||||
|
||||
const cuComplex alphacf = make_cuComplex(alphaf, 0);
|
||||
const cuComplex betacf = make_cuComplex(betaf, 0);
|
||||
|
||||
const cuDoubleComplex alphac = make_cuDoubleComplex(alpha, 0);
|
||||
const cuDoubleComplex betac = make_cuDoubleComplex(beta, 0);
|
||||
|
||||
cublasOperation_t transa = tr2 ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
cublasOperation_t transb = tr1 ? CUBLAS_OP_T : CUBLAS_OP_N;
|
||||
|
||||
switch (src1.type())
|
||||
{
|
||||
case CV_32FC1:
|
||||
cublasSafeCall( cublasSgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
|
||||
&alphaf,
|
||||
src2.ptr<float>(), static_cast<int>(src2.step / sizeof(float)),
|
||||
src1.ptr<float>(), static_cast<int>(src1.step / sizeof(float)),
|
||||
&betaf,
|
||||
dst.ptr<float>(), static_cast<int>(dst.step / sizeof(float))) );
|
||||
break;
|
||||
|
||||
case CV_64FC1:
|
||||
cublasSafeCall( cublasDgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
|
||||
&alpha,
|
||||
src2.ptr<double>(), static_cast<int>(src2.step / sizeof(double)),
|
||||
src1.ptr<double>(), static_cast<int>(src1.step / sizeof(double)),
|
||||
&beta,
|
||||
dst.ptr<double>(), static_cast<int>(dst.step / sizeof(double))) );
|
||||
break;
|
||||
|
||||
case CV_32FC2:
|
||||
cublasSafeCall( cublasCgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
|
||||
&alphacf,
|
||||
src2.ptr<cuComplex>(), static_cast<int>(src2.step / sizeof(cuComplex)),
|
||||
src1.ptr<cuComplex>(), static_cast<int>(src1.step / sizeof(cuComplex)),
|
||||
&betacf,
|
||||
dst.ptr<cuComplex>(), static_cast<int>(dst.step / sizeof(cuComplex))) );
|
||||
break;
|
||||
|
||||
case CV_64FC2:
|
||||
cublasSafeCall( cublasZgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
|
||||
&alphac,
|
||||
src2.ptr<cuDoubleComplex>(), static_cast<int>(src2.step / sizeof(cuDoubleComplex)),
|
||||
src1.ptr<cuDoubleComplex>(), static_cast<int>(src1.step / sizeof(cuDoubleComplex)),
|
||||
&betac,
|
||||
dst.ptr<cuDoubleComplex>(), static_cast<int>(dst.step / sizeof(cuDoubleComplex))) );
|
||||
break;
|
||||
}
|
||||
|
||||
cublasSafeCall( cublasDestroy_v2(handle) );
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// transpose
|
||||
|
||||
void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
|
||||
{
|
||||
CV_Assert(src.elemSize() == 1 || src.elemSize() == 4 || src.elemSize() == 8);
|
||||
|
||||
dst.create( src.cols, src.rows, src.type() );
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
|
||||
if (src.elemSize() == 1)
|
||||
{
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( nppiTranspose_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
|
||||
}
|
||||
else if (src.elemSize() == 4)
|
||||
{
|
||||
NppStStreamHandler h(stream);
|
||||
|
||||
NcvSize32u sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
ncvSafeCall( nppiStTranspose_32u_C1R(const_cast<Ncv32u*>(src.ptr<Ncv32u>()), static_cast<int>(src.step),
|
||||
dst.ptr<Ncv32u>(), static_cast<int>(dst.step), sz) );
|
||||
}
|
||||
else // if (src.elemSize() == 8)
|
||||
{
|
||||
if (!deviceSupports(NATIVE_DOUBLE))
|
||||
CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
|
||||
|
||||
NppStStreamHandler h(stream);
|
||||
|
||||
NcvSize32u sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
ncvSafeCall( nppiStTranspose_64u_C1R(const_cast<Ncv64u*>(src.ptr<Ncv64u>()), static_cast<int>(src.step),
|
||||
dst.ptr<Ncv64u>(), static_cast<int>(dst.step), sz) );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// flip
|
||||
|
||||
namespace
|
||||
{
|
||||
template<int DEPTH> struct NppTypeTraits;
|
||||
template<> struct NppTypeTraits<CV_8U> { typedef Npp8u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_8S> { typedef Npp8s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16U> { typedef Npp16u npp_t; };
|
||||
template<> struct NppTypeTraits<CV_16S> { typedef Npp16s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_32S> { typedef Npp32s npp_t; };
|
||||
template<> struct NppTypeTraits<CV_32F> { typedef Npp32f npp_t; };
|
||||
template<> struct NppTypeTraits<CV_64F> { typedef Npp64f npp_t; };
|
||||
|
||||
template <int DEPTH> struct NppMirrorFunc
|
||||
{
|
||||
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
|
||||
|
||||
typedef NppStatus (*func_t)(const npp_t* pSrc, int nSrcStep, npp_t* pDst, int nDstStep, NppiSize oROI, NppiAxis flip);
|
||||
};
|
||||
|
||||
template <int DEPTH, typename NppMirrorFunc<DEPTH>::func_t func> struct NppMirror
|
||||
{
|
||||
typedef typename NppMirrorFunc<DEPTH>::npp_t npp_t;
|
||||
|
||||
static void call(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream)
|
||||
{
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step),
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), sz,
|
||||
(flipCode == 0 ? NPP_HORIZONTAL_AXIS : (flipCode > 0 ? NPP_VERTICAL_AXIS : NPP_BOTH_AXIS))) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
{NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{NppMirror<CV_16U, nppiMirror_16u_C1R>::call, 0, NppMirror<CV_16U, nppiMirror_16u_C3R>::call, NppMirror<CV_16U, nppiMirror_16u_C4R>::call},
|
||||
{0,0,0,0},
|
||||
{NppMirror<CV_32S, nppiMirror_32s_C1R>::call, 0, NppMirror<CV_32S, nppiMirror_32s_C3R>::call, NppMirror<CV_32S, nppiMirror_32s_C4R>::call},
|
||||
{NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
|
||||
};
|
||||
|
||||
CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
|
||||
CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
|
||||
|
||||
dst.create(src.size(), src.type());
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// LUT
|
||||
|
||||
void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst, Stream& s)
|
||||
{
|
||||
const int cn = src.channels();
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
|
||||
CV_Assert( lut.depth() == CV_8U );
|
||||
CV_Assert( lut.channels() == 1 || lut.channels() == cn );
|
||||
CV_Assert( lut.rows * lut.cols == 256 && lut.isContinuous() );
|
||||
|
||||
dst.create(src.size(), CV_MAKE_TYPE(lut.depth(), cn));
|
||||
|
||||
NppiSize sz;
|
||||
sz.height = src.rows;
|
||||
sz.width = src.cols;
|
||||
|
||||
Mat nppLut;
|
||||
lut.convertTo(nppLut, CV_32S);
|
||||
|
||||
int nValues3[] = {256, 256, 256};
|
||||
|
||||
Npp32s pLevels[256];
|
||||
for (int i = 0; i < 256; ++i)
|
||||
pLevels[i] = i;
|
||||
|
||||
const Npp32s* pLevels3[3];
|
||||
|
||||
#if (CUDA_VERSION <= 4020)
|
||||
pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
|
||||
#else
|
||||
GpuMat d_pLevels;
|
||||
d_pLevels.upload(Mat(1, 256, CV_32S, pLevels));
|
||||
pLevels3[0] = pLevels3[1] = pLevels3[2] = d_pLevels.ptr<Npp32s>();
|
||||
#endif
|
||||
|
||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
if (src.type() == CV_8UC1)
|
||||
{
|
||||
#if (CUDA_VERSION <= 4020)
|
||||
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, nppLut.ptr<Npp32s>(), pLevels, 256) );
|
||||
#else
|
||||
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
|
||||
nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step),
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, d_nppLut.ptr<Npp32s>(), d_pLevels.ptr<Npp32s>(), 256) );
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
const Npp32s* pValues3[3];
|
||||
|
||||
Mat nppLut3[3];
|
||||
if (nppLut.channels() == 1)
|
||||
{
|
||||
#if (CUDA_VERSION <= 4020)
|
||||
pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
|
||||
#else
|
||||
GpuMat d_nppLut(Mat(1, 256, CV_32S, nppLut.data));
|
||||
pValues3[0] = pValues3[1] = pValues3[2] = d_nppLut.ptr<Npp32s>();
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::split(nppLut, nppLut3);
|
||||
|
||||
#if (CUDA_VERSION <= 4020)
|
||||
pValues3[0] = nppLut3[0].ptr<Npp32s>();
|
||||
pValues3[1] = nppLut3[1].ptr<Npp32s>();
|
||||
pValues3[2] = nppLut3[2].ptr<Npp32s>();
|
||||
#else
|
||||
GpuMat d_nppLut0(Mat(1, 256, CV_32S, nppLut3[0].data));
|
||||
GpuMat d_nppLut1(Mat(1, 256, CV_32S, nppLut3[1].data));
|
||||
GpuMat d_nppLut2(Mat(1, 256, CV_32S, nppLut3[2].data));
|
||||
|
||||
pValues3[0] = d_nppLut0.ptr<Npp32s>();
|
||||
pValues3[1] = d_nppLut1.ptr<Npp32s>();
|
||||
pValues3[2] = d_nppLut2.ptr<Npp32s>();
|
||||
#endif
|
||||
}
|
||||
|
||||
nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), static_cast<int>(src.step),
|
||||
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz, pValues3, pLevels3, nValues3) );
|
||||
}
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// NPP magnitide
|
||||
|
||||
namespace
|
||||
{
|
||||
typedef NppStatus (*nppMagnitude_t)(const Npp32fc* pSrc, int nSrcStep, Npp32f* pDst, int nDstStep, NppiSize oSizeROI);
|
||||
|
||||
inline void npp_magnitude(const GpuMat& src, GpuMat& dst, nppMagnitude_t func, cudaStream_t stream)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32FC2);
|
||||
|
||||
dst.create(src.size(), CV_32FC1);
|
||||
|
||||
NppiSize sz;
|
||||
sz.width = src.cols;
|
||||
sz.height = src.rows;
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// Polar <-> Cart
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace mathfunc
|
||||
{
|
||||
void cartToPolar_gpu(PtrStepSzf x, PtrStepSzf y, PtrStepSzf mag, bool magSqr, PtrStepSzf angle, bool angleInDegrees, cudaStream_t stream);
|
||||
void polarToCart_gpu(PtrStepSzf mag, PtrStepSzf angle, PtrStepSzf x, PtrStepSzf y, bool angleInDegrees, cudaStream_t stream);
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
inline void cartToPolar_caller(const GpuMat& x, const GpuMat& y, GpuMat* mag, bool magSqr, GpuMat* angle, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
using namespace ::cv::gpu::cudev::mathfunc;
|
||||
|
||||
CV_Assert(x.size() == y.size() && x.type() == y.type());
|
||||
CV_Assert(x.depth() == CV_32F);
|
||||
|
||||
if (mag)
|
||||
mag->create(x.size(), x.type());
|
||||
if (angle)
|
||||
angle->create(x.size(), x.type());
|
||||
|
||||
GpuMat x1cn = x.reshape(1);
|
||||
GpuMat y1cn = y.reshape(1);
|
||||
GpuMat mag1cn = mag ? mag->reshape(1) : GpuMat();
|
||||
GpuMat angle1cn = angle ? angle->reshape(1) : GpuMat();
|
||||
|
||||
cartToPolar_gpu(x1cn, y1cn, mag1cn, magSqr, angle1cn, angleInDegrees, stream);
|
||||
}
|
||||
|
||||
inline void polarToCart_caller(const GpuMat& mag, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, cudaStream_t stream)
|
||||
{
|
||||
using namespace ::cv::gpu::cudev::mathfunc;
|
||||
|
||||
CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
|
||||
CV_Assert(mag.depth() == CV_32F);
|
||||
|
||||
x.create(mag.size(), mag.type());
|
||||
y.create(mag.size(), mag.type());
|
||||
|
||||
GpuMat mag1cn = mag.reshape(1);
|
||||
GpuMat angle1cn = angle.reshape(1);
|
||||
GpuMat x1cn = x.reshape(1);
|
||||
GpuMat y1cn = y.reshape(1);
|
||||
|
||||
polarToCart_gpu(mag1cn, angle1cn, x1cn, y1cn, angleInDegrees, stream);
|
||||
}
|
||||
}
|
||||
|
||||
void cv::gpu::magnitude(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
cartToPolar_caller(x, y, &dst, false, 0, false, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::magnitudeSqr(const GpuMat& x, const GpuMat& y, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
cartToPolar_caller(x, y, &dst, true, 0, false, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::phase(const GpuMat& x, const GpuMat& y, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
||||
{
|
||||
cartToPolar_caller(x, y, 0, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::cartToPolar(const GpuMat& x, const GpuMat& y, GpuMat& mag, GpuMat& angle, bool angleInDegrees, Stream& stream)
|
||||
{
|
||||
cartToPolar_caller(x, y, &mag, false, &angle, angleInDegrees, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat& x, GpuMat& y, bool angleInDegrees, Stream& stream)
|
||||
{
|
||||
polarToCart_caller(magnitude, angle, x, y, angleInDegrees, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// normalize
|
||||
|
||||
void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask)
|
||||
{
|
||||
GpuMat norm_buf;
|
||||
GpuMat cvt_buf;
|
||||
normalize(src, dst, a, b, norm_type, dtype, mask, norm_buf, cvt_buf);
|
||||
}
|
||||
|
||||
void cv::gpu::normalize(const GpuMat& src, GpuMat& dst, double a, double b, int norm_type, int dtype, const GpuMat& mask, GpuMat& norm_buf, GpuMat& cvt_buf)
|
||||
{
|
||||
double scale = 1, shift = 0;
|
||||
if (norm_type == NORM_MINMAX)
|
||||
{
|
||||
double smin = 0, smax = 0;
|
||||
double dmin = std::min(a, b), dmax = std::max(a, b);
|
||||
minMax(src, &smin, &smax, mask, norm_buf);
|
||||
scale = (dmax - dmin) * (smax - smin > std::numeric_limits<double>::epsilon() ? 1.0 / (smax - smin) : 0.0);
|
||||
shift = dmin - smin * scale;
|
||||
}
|
||||
else if (norm_type == NORM_L2 || norm_type == NORM_L1 || norm_type == NORM_INF)
|
||||
{
|
||||
scale = norm(src, norm_type, mask, norm_buf);
|
||||
scale = scale > std::numeric_limits<double>::epsilon() ? a / scale : 0.0;
|
||||
shift = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
CV_Error(cv::Error::StsBadArg, "Unknown/unsupported norm type");
|
||||
}
|
||||
|
||||
if (mask.empty())
|
||||
{
|
||||
src.convertTo(dst, dtype, scale, shift);
|
||||
}
|
||||
else
|
||||
{
|
||||
src.convertTo(cvt_buf, dtype, scale, shift);
|
||||
cvt_buf.copyTo(dst, mask);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
@ -51,9 +51,6 @@ void cv::gpu::blendLinear(const GpuMat&, const GpuMat&, const GpuMat&, const Gpu
|
||||
|
||||
#else
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// blendLinear
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace blend
|
@ -48,7 +48,9 @@ using namespace cv::gpu;
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::transformPoints(const GpuMat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::projectPoints(const GpuMat&, const Mat&, const Mat&, const Mat&, const Mat&, GpuMat&, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::solvePnPRansac(const Mat&, const Mat&, const Mat&, const Mat&, Mat&, Mat&, bool, int, float, int, std::vector<int>*) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
@ -148,7 +150,7 @@ namespace
|
||||
}
|
||||
|
||||
// Computes rotation, translation pair for small subsets if the input data
|
||||
class TransformHypothesesGenerator : public cv::ParallelLoopBody
|
||||
class TransformHypothesesGenerator
|
||||
{
|
||||
public:
|
||||
TransformHypothesesGenerator(const Mat& object_, const Mat& image_, const Mat& dist_coef_,
|
||||
@ -158,7 +160,7 @@ namespace
|
||||
num_points(num_points_), subset_size(subset_size_), rot_matrices(rot_matrices_),
|
||||
transl_vectors(transl_vectors_) {}
|
||||
|
||||
void operator()(const Range& range) const
|
||||
void operator()(const BlockedRange& range) const
|
||||
{
|
||||
// Input data for generation of the current hypothesis
|
||||
std::vector<int> subset_indices(subset_size);
|
||||
@ -170,7 +172,7 @@ namespace
|
||||
Mat rot_mat(3, 3, CV_64F);
|
||||
Mat transl_vec(1, 3, CV_64F);
|
||||
|
||||
for (int iter = range.start; iter < range.end; ++iter)
|
||||
for (int iter = range.begin(); iter < range.end(); ++iter)
|
||||
{
|
||||
selectRandom(subset_size, num_points, subset_indices);
|
||||
for (int i = 0; i < subset_size; ++i)
|
||||
@ -236,7 +238,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
|
||||
// Generate set of hypotheses using small subsets of the input data
|
||||
TransformHypothesesGenerator body(object, image_normalized, empty_dist_coef, eye_camera_mat,
|
||||
num_points, subset_size, rot_matrices, transl_vectors);
|
||||
parallel_for_(Range(0, num_iters), body);
|
||||
parallel_for(BlockedRange(0, num_iters), body);
|
||||
|
||||
// Compute scores (i.e. number of inliers) for each hypothesis
|
||||
GpuMat d_object(object);
|
||||
@ -250,7 +252,7 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
|
||||
// Find the best hypothesis index
|
||||
Point best_idx;
|
||||
double best_score;
|
||||
gpu::minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
|
||||
minMaxLoc(d_hypothesis_scores, NULL, &best_score, NULL, &best_idx);
|
||||
int num_inliers = static_cast<int>(best_score);
|
||||
|
||||
// Extract the best hypothesis data
|
||||
@ -288,3 +290,5 @@ void cv::gpu::solvePnPRansac(const Mat& object, const Mat& image, const Mat& cam
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
@ -41,6 +41,8 @@
|
||||
//M*/
|
||||
|
||||
#include "precomp.hpp"
|
||||
#include <vector>
|
||||
#include <iostream>
|
||||
#include "opencv2/objdetect/objdetect_c.h"
|
||||
|
||||
using namespace cv;
|
||||
@ -73,37 +75,6 @@ public:
|
||||
virtual bool read(const String& classifierAsXml) = 0;
|
||||
};
|
||||
|
||||
#ifndef HAVE_OPENCV_GPULEGACY
|
||||
|
||||
struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
|
||||
{
|
||||
public:
|
||||
HaarCascade()
|
||||
{
|
||||
throw_no_cuda();
|
||||
}
|
||||
|
||||
unsigned int process(const GpuMat&, GpuMat&, float, int, bool, bool, cv::Size, cv::Size)
|
||||
{
|
||||
throw_no_cuda();
|
||||
return 0;
|
||||
}
|
||||
|
||||
cv::Size getClassifierCvSize() const
|
||||
{
|
||||
throw_no_cuda();
|
||||
return cv::Size();
|
||||
}
|
||||
|
||||
bool read(const String&)
|
||||
{
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
struct cv::gpu::CascadeClassifier_GPU::HaarCascade : cv::gpu::CascadeClassifier_GPU::CascadeClassifierImpl
|
||||
{
|
||||
public:
|
||||
@ -313,8 +284,6 @@ private:
|
||||
virtual ~HaarCascade(){}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
cv::Size operator -(const cv::Size& a, const cv::Size& b)
|
||||
{
|
||||
return cv::Size(a.width - b.width, a.height - b.height);
|
||||
@ -508,8 +477,6 @@ private:
|
||||
resuzeBuffer.create(frame, CV_8UC1);
|
||||
|
||||
integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
|
||||
|
||||
#ifdef HAVE_OPENCV_GPULEGACY
|
||||
NcvSize32u roiSize;
|
||||
roiSize.width = frame.width;
|
||||
roiSize.height = frame.height;
|
||||
@ -520,7 +487,6 @@ private:
|
||||
Ncv32u bufSize;
|
||||
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
|
||||
integralBuffer.create(1, bufSize, CV_8UC1);
|
||||
#endif
|
||||
|
||||
candidates.create(1 , frame.width >> 1, CV_32SC4);
|
||||
}
|
||||
@ -756,3 +722,240 @@ bool cv::gpu::CascadeClassifier_GPU::load(const String& filename)
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
#if defined (HAVE_CUDA)
|
||||
|
||||
struct RectConvert
|
||||
{
|
||||
Rect operator()(const NcvRect32u& nr) const { return Rect(nr.x, nr.y, nr.width, nr.height); }
|
||||
NcvRect32u operator()(const Rect& nr) const
|
||||
{
|
||||
NcvRect32u rect;
|
||||
rect.x = nr.x;
|
||||
rect.y = nr.y;
|
||||
rect.width = nr.width;
|
||||
rect.height = nr.height;
|
||||
return rect;
|
||||
}
|
||||
};
|
||||
|
||||
void groupRectangles(std::vector<NcvRect32u> &hypotheses, int groupThreshold, double eps, std::vector<Ncv32u> *weights)
|
||||
{
|
||||
std::vector<Rect> rects(hypotheses.size());
|
||||
std::transform(hypotheses.begin(), hypotheses.end(), rects.begin(), RectConvert());
|
||||
|
||||
if (weights)
|
||||
{
|
||||
std::vector<int> weights_int;
|
||||
weights_int.assign(weights->begin(), weights->end());
|
||||
cv::groupRectangles(rects, weights_int, groupThreshold, eps);
|
||||
}
|
||||
else
|
||||
{
|
||||
cv::groupRectangles(rects, groupThreshold, eps);
|
||||
}
|
||||
std::transform(rects.begin(), rects.end(), hypotheses.begin(), RectConvert());
|
||||
hypotheses.resize(rects.size());
|
||||
}
|
||||
|
||||
NCVStatus loadFromXML(const String &filename,
|
||||
HaarClassifierCascadeDescriptor &haar,
|
||||
std::vector<HaarStage64> &haarStages,
|
||||
std::vector<HaarClassifierNode128> &haarClassifierNodes,
|
||||
std::vector<HaarFeature64> &haarFeatures)
|
||||
{
|
||||
NCVStatus ncvStat;
|
||||
|
||||
haar.NumStages = 0;
|
||||
haar.NumClassifierRootNodes = 0;
|
||||
haar.NumClassifierTotalNodes = 0;
|
||||
haar.NumFeatures = 0;
|
||||
haar.ClassifierSize.width = 0;
|
||||
haar.ClassifierSize.height = 0;
|
||||
haar.bHasStumpsOnly = true;
|
||||
haar.bNeedsTiltedII = false;
|
||||
Ncv32u curMaxTreeDepth;
|
||||
|
||||
std::vector<char> xmlFileCont;
|
||||
|
||||
std::vector<HaarClassifierNode128> h_TmpClassifierNotRootNodes;
|
||||
haarStages.resize(0);
|
||||
haarClassifierNodes.resize(0);
|
||||
haarFeatures.resize(0);
|
||||
|
||||
Ptr<CvHaarClassifierCascade> oldCascade = (CvHaarClassifierCascade*)cvLoad(filename.c_str(), 0, 0, 0);
|
||||
if (oldCascade.empty())
|
||||
{
|
||||
return NCV_HAAR_XML_LOADING_EXCEPTION;
|
||||
}
|
||||
|
||||
haar.ClassifierSize.width = oldCascade->orig_window_size.width;
|
||||
haar.ClassifierSize.height = oldCascade->orig_window_size.height;
|
||||
|
||||
int stagesCound = oldCascade->count;
|
||||
for(int s = 0; s < stagesCound; ++s) // by stages
|
||||
{
|
||||
HaarStage64 curStage;
|
||||
curStage.setStartClassifierRootNodeOffset(static_cast<Ncv32u>(haarClassifierNodes.size()));
|
||||
|
||||
curStage.setStageThreshold(oldCascade->stage_classifier[s].threshold);
|
||||
|
||||
int treesCount = oldCascade->stage_classifier[s].count;
|
||||
for(int t = 0; t < treesCount; ++t) // by trees
|
||||
{
|
||||
Ncv32u nodeId = 0;
|
||||
CvHaarClassifier* tree = &oldCascade->stage_classifier[s].classifier[t];
|
||||
|
||||
int nodesCount = tree->count;
|
||||
for(int n = 0; n < nodesCount; ++n) //by features
|
||||
{
|
||||
CvHaarFeature* feature = &tree->haar_feature[n];
|
||||
|
||||
HaarClassifierNode128 curNode;
|
||||
curNode.setThreshold(tree->threshold[n]);
|
||||
|
||||
NcvBool bIsLeftNodeLeaf = false;
|
||||
NcvBool bIsRightNodeLeaf = false;
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeLeft;
|
||||
if ( tree->left[n] <= 0 )
|
||||
{
|
||||
Ncv32f leftVal = tree->alpha[-tree->left[n]];
|
||||
ncvStat = nodeLeft.create(leftVal);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||
bIsLeftNodeLeaf = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Ncv32u leftNodeOffset = tree->left[n];
|
||||
nodeLeft.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + leftNodeOffset - 1));
|
||||
haar.bHasStumpsOnly = false;
|
||||
}
|
||||
curNode.setLeftNodeDesc(nodeLeft);
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeRight;
|
||||
if ( tree->right[n] <= 0 )
|
||||
{
|
||||
Ncv32f rightVal = tree->alpha[-tree->right[n]];
|
||||
ncvStat = nodeRight.create(rightVal);
|
||||
ncvAssertReturn(ncvStat == NCV_SUCCESS, ncvStat);
|
||||
bIsRightNodeLeaf = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
Ncv32u rightNodeOffset = tree->right[n];
|
||||
nodeRight.create((Ncv32u)(h_TmpClassifierNotRootNodes.size() + rightNodeOffset - 1));
|
||||
haar.bHasStumpsOnly = false;
|
||||
}
|
||||
curNode.setRightNodeDesc(nodeRight);
|
||||
|
||||
Ncv32u tiltedVal = feature->tilted;
|
||||
haar.bNeedsTiltedII = (tiltedVal != 0);
|
||||
|
||||
Ncv32u featureId = 0;
|
||||
for(int l = 0; l < CV_HAAR_FEATURE_MAX; ++l) //by rects
|
||||
{
|
||||
Ncv32u rectX = feature->rect[l].r.x;
|
||||
Ncv32u rectY = feature->rect[l].r.y;
|
||||
Ncv32u rectWidth = feature->rect[l].r.width;
|
||||
Ncv32u rectHeight = feature->rect[l].r.height;
|
||||
|
||||
Ncv32f rectWeight = feature->rect[l].weight;
|
||||
|
||||
if (rectWeight == 0/* && rectX == 0 &&rectY == 0 && rectWidth == 0 && rectHeight == 0*/)
|
||||
break;
|
||||
|
||||
HaarFeature64 curFeature;
|
||||
ncvStat = curFeature.setRect(rectX, rectY, rectWidth, rectHeight, haar.ClassifierSize.width, haar.ClassifierSize.height);
|
||||
curFeature.setWeight(rectWeight);
|
||||
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||
haarFeatures.push_back(curFeature);
|
||||
|
||||
featureId++;
|
||||
}
|
||||
|
||||
HaarFeatureDescriptor32 tmpFeatureDesc;
|
||||
ncvStat = tmpFeatureDesc.create(haar.bNeedsTiltedII, bIsLeftNodeLeaf, bIsRightNodeLeaf,
|
||||
featureId, static_cast<Ncv32u>(haarFeatures.size()) - featureId);
|
||||
ncvAssertReturn(NCV_SUCCESS == ncvStat, ncvStat);
|
||||
curNode.setFeatureDesc(tmpFeatureDesc);
|
||||
|
||||
if (!nodeId)
|
||||
{
|
||||
//root node
|
||||
haarClassifierNodes.push_back(curNode);
|
||||
curMaxTreeDepth = 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
//other node
|
||||
h_TmpClassifierNotRootNodes.push_back(curNode);
|
||||
curMaxTreeDepth++;
|
||||
}
|
||||
|
||||
nodeId++;
|
||||
}
|
||||
}
|
||||
|
||||
curStage.setNumClassifierRootNodes(treesCount);
|
||||
haarStages.push_back(curStage);
|
||||
}
|
||||
|
||||
//fill in cascade stats
|
||||
haar.NumStages = static_cast<Ncv32u>(haarStages.size());
|
||||
haar.NumClassifierRootNodes = static_cast<Ncv32u>(haarClassifierNodes.size());
|
||||
haar.NumClassifierTotalNodes = static_cast<Ncv32u>(haar.NumClassifierRootNodes + h_TmpClassifierNotRootNodes.size());
|
||||
haar.NumFeatures = static_cast<Ncv32u>(haarFeatures.size());
|
||||
|
||||
//merge root and leaf nodes in one classifiers array
|
||||
Ncv32u offsetRoot = static_cast<Ncv32u>(haarClassifierNodes.size());
|
||||
for (Ncv32u i=0; i<haarClassifierNodes.size(); i++)
|
||||
{
|
||||
HaarFeatureDescriptor32 featureDesc = haarClassifierNodes[i].getFeatureDesc();
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeLeft = haarClassifierNodes[i].getLeftNodeDesc();
|
||||
if (!featureDesc.isLeftNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||
nodeLeft.create(newOffset);
|
||||
}
|
||||
haarClassifierNodes[i].setLeftNodeDesc(nodeLeft);
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeRight = haarClassifierNodes[i].getRightNodeDesc();
|
||||
if (!featureDesc.isRightNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||
nodeRight.create(newOffset);
|
||||
}
|
||||
haarClassifierNodes[i].setRightNodeDesc(nodeRight);
|
||||
}
|
||||
|
||||
for (Ncv32u i=0; i<h_TmpClassifierNotRootNodes.size(); i++)
|
||||
{
|
||||
HaarFeatureDescriptor32 featureDesc = h_TmpClassifierNotRootNodes[i].getFeatureDesc();
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeLeft = h_TmpClassifierNotRootNodes[i].getLeftNodeDesc();
|
||||
if (!featureDesc.isLeftNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeLeft.getNextNodeOffset() + offsetRoot;
|
||||
nodeLeft.create(newOffset);
|
||||
}
|
||||
h_TmpClassifierNotRootNodes[i].setLeftNodeDesc(nodeLeft);
|
||||
|
||||
HaarClassifierNodeDescriptor32 nodeRight = h_TmpClassifierNotRootNodes[i].getRightNodeDesc();
|
||||
if (!featureDesc.isRightNodeLeaf())
|
||||
{
|
||||
Ncv32u newOffset = nodeRight.getNextNodeOffset() + offsetRoot;
|
||||
nodeRight.create(newOffset);
|
||||
}
|
||||
h_TmpClassifierNotRootNodes[i].setRightNodeDesc(nodeRight);
|
||||
|
||||
haarClassifierNodes.push_back(h_TmpClassifierNotRootNodes[i]);
|
||||
}
|
||||
|
||||
return NCV_SUCCESS;
|
||||
}
|
||||
|
||||
#endif /* HAVE_CUDA */
|
||||
|
@ -48,16 +48,10 @@ using namespace cv::gpu;
|
||||
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
|
||||
|
||||
void cv::gpu::cvtColor(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::demosaicing(const GpuMat&, GpuMat&, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::swapChannels(GpuMat&, const int[], Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::gammaCorrection(const GpuMat&, GpuMat&, bool, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::gpu::alphaComp(const GpuMat&, const GpuMat&, GpuMat&, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
#include "cvt_color_internal.h"
|
||||
@ -1587,7 +1581,7 @@ namespace
|
||||
(void)src;
|
||||
(void)dst;
|
||||
(void)st;
|
||||
CV_Error( cv::Error::StsBadFlag, "Unknown/unsupported color conversion code" );
|
||||
CV_Error( CV_StsBadFlag, "Unknown/unsupported color conversion code" );
|
||||
#else
|
||||
CV_Assert(src.type() == CV_8UC4 || src.type() == CV_16UC4);
|
||||
|
||||
@ -1682,9 +1676,6 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// cvtColor
|
||||
|
||||
void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
|
||||
{
|
||||
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int dcn, Stream& stream);
|
||||
@ -1868,9 +1859,6 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream
|
||||
func(src, dst, dcn, stream);
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// demosaicing
|
||||
|
||||
void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Stream& stream)
|
||||
{
|
||||
const int depth = src.depth();
|
||||
@ -1939,9 +1927,6 @@ void cv::gpu::demosaicing(const GpuMat& src, GpuMat& dst, int code, int dcn, Str
|
||||
}
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// swapChannels
|
||||
|
||||
void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
|
||||
{
|
||||
CV_Assert(image.type() == CV_8UC4);
|
||||
@ -1960,9 +1945,6 @@ void cv::gpu::swapChannels(GpuMat& image, const int dstOrder[4], Stream& s)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// gammaCorrection
|
||||
|
||||
void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stream& stream)
|
||||
{
|
||||
#if (CUDA_VERSION < 5000)
|
||||
@ -2004,77 +1986,4 @@ void cv::gpu::gammaCorrection(const GpuMat& src, GpuMat& dst, bool forward, Stre
|
||||
#endif
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// alphaComp
|
||||
|
||||
namespace
|
||||
{
|
||||
template <int DEPTH> struct NppAlphaCompFunc
|
||||
{
|
||||
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
|
||||
|
||||
typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, const npp_t* pSrc2, int nSrc2Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, NppiAlphaOp eAlphaOp);
|
||||
};
|
||||
|
||||
template <int DEPTH, typename NppAlphaCompFunc<DEPTH>::func_t func> struct NppAlphaComp
|
||||
{
|
||||
typedef typename NPPTypeTraits<DEPTH>::npp_type npp_t;
|
||||
|
||||
static void call(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream)
|
||||
{
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
NppiSize oSizeROI;
|
||||
oSizeROI.width = img1.cols;
|
||||
oSizeROI.height = img2.rows;
|
||||
|
||||
nppSafeCall( func(img1.ptr<npp_t>(), static_cast<int>(img1.step), img2.ptr<npp_t>(), static_cast<int>(img2.step),
|
||||
dst.ptr<npp_t>(), static_cast<int>(dst.step), oSizeROI, eAlphaOp) );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int alpha_op, Stream& stream)
|
||||
{
|
||||
static const NppiAlphaOp npp_alpha_ops[] = {
|
||||
NPPI_OP_ALPHA_OVER,
|
||||
NPPI_OP_ALPHA_IN,
|
||||
NPPI_OP_ALPHA_OUT,
|
||||
NPPI_OP_ALPHA_ATOP,
|
||||
NPPI_OP_ALPHA_XOR,
|
||||
NPPI_OP_ALPHA_PLUS,
|
||||
NPPI_OP_ALPHA_OVER_PREMUL,
|
||||
NPPI_OP_ALPHA_IN_PREMUL,
|
||||
NPPI_OP_ALPHA_OUT_PREMUL,
|
||||
NPPI_OP_ALPHA_ATOP_PREMUL,
|
||||
NPPI_OP_ALPHA_XOR_PREMUL,
|
||||
NPPI_OP_ALPHA_PLUS_PREMUL,
|
||||
NPPI_OP_ALPHA_PREMUL
|
||||
};
|
||||
|
||||
typedef void (*func_t)(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, NppiAlphaOp eAlphaOp, cudaStream_t stream);
|
||||
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
NppAlphaComp<CV_8U, nppiAlphaComp_8u_AC4R>::call,
|
||||
0,
|
||||
NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call,
|
||||
0,
|
||||
NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call,
|
||||
NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
|
||||
};
|
||||
|
||||
CV_Assert( img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4 );
|
||||
CV_Assert( img1.size() == img2.size() && img1.type() == img2.type() );
|
||||
|
||||
dst.create(img1.size(), img1.type());
|
||||
|
||||
const func_t func = funcs[img1.depth()];
|
||||
|
||||
func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
201
modules/gpu/src/cuda/NV12ToARGB.cu
Normal file
201
modules/gpu/src/cuda/NV12ToARGB.cu
Normal file
@ -0,0 +1,201 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
/*
|
||||
* NV12ToARGB color space conversion CUDA kernel
|
||||
*
|
||||
* This sample uses CUDA to perform a simple NV12 (YUV 4:2:0 planar)
|
||||
* source and converts to output in ARGB format
|
||||
*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev {
|
||||
namespace video_decoding
|
||||
{
|
||||
__constant__ uint constAlpha = ((uint)0xff << 24);
|
||||
|
||||
__constant__ float constHueColorSpaceMat[9];
|
||||
|
||||
void loadHueCSC(float hueCSC[9])
|
||||
{
|
||||
cudaSafeCall( cudaMemcpyToSymbol(constHueColorSpaceMat, hueCSC, 9 * sizeof(float)) );
|
||||
}
|
||||
|
||||
__device__ void YUV2RGB(const uint* yuvi, float* red, float* green, float* blue)
|
||||
{
|
||||
float luma, chromaCb, chromaCr;
|
||||
|
||||
// Prepare for hue adjustment
|
||||
luma = (float)yuvi[0];
|
||||
chromaCb = (float)((int)yuvi[1] - 512.0f);
|
||||
chromaCr = (float)((int)yuvi[2] - 512.0f);
|
||||
|
||||
// Convert YUV To RGB with hue adjustment
|
||||
*red = (luma * constHueColorSpaceMat[0]) +
|
||||
(chromaCb * constHueColorSpaceMat[1]) +
|
||||
(chromaCr * constHueColorSpaceMat[2]);
|
||||
|
||||
*green = (luma * constHueColorSpaceMat[3]) +
|
||||
(chromaCb * constHueColorSpaceMat[4]) +
|
||||
(chromaCr * constHueColorSpaceMat[5]);
|
||||
|
||||
*blue = (luma * constHueColorSpaceMat[6]) +
|
||||
(chromaCb * constHueColorSpaceMat[7]) +
|
||||
(chromaCr * constHueColorSpaceMat[8]);
|
||||
}
|
||||
|
||||
__device__ uint RGBAPACK_10bit(float red, float green, float blue, uint alpha)
|
||||
{
|
||||
uint ARGBpixel = 0;
|
||||
|
||||
// Clamp final 10 bit results
|
||||
red = ::fmin(::fmax(red, 0.0f), 1023.f);
|
||||
green = ::fmin(::fmax(green, 0.0f), 1023.f);
|
||||
blue = ::fmin(::fmax(blue, 0.0f), 1023.f);
|
||||
|
||||
// Convert to 8 bit unsigned integers per color component
|
||||
ARGBpixel = (((uint)blue >> 2) |
|
||||
(((uint)green >> 2) << 8) |
|
||||
(((uint)red >> 2) << 16) |
|
||||
(uint)alpha);
|
||||
|
||||
return ARGBpixel;
|
||||
}
|
||||
|
||||
// CUDA kernel for outputing the final ARGB output from NV12
|
||||
|
||||
#define COLOR_COMPONENT_BIT_SIZE 10
|
||||
#define COLOR_COMPONENT_MASK 0x3FF
|
||||
|
||||
__global__ void NV12ToARGB(uchar* srcImage, size_t nSourcePitch,
|
||||
uint* dstImage, size_t nDestPitch,
|
||||
uint width, uint height)
|
||||
{
|
||||
// Pad borders with duplicate pixels, and we multiply by 2 because we process 2 pixels per thread
|
||||
const int x = blockIdx.x * (blockDim.x << 1) + (threadIdx.x << 1);
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x >= width || y >= height)
|
||||
return;
|
||||
|
||||
// Read 2 Luma components at a time, so we don't waste processing since CbCr are decimated this way.
|
||||
// if we move to texture we could read 4 luminance values
|
||||
|
||||
uint yuv101010Pel[2];
|
||||
|
||||
yuv101010Pel[0] = (srcImage[y * nSourcePitch + x ]) << 2;
|
||||
yuv101010Pel[1] = (srcImage[y * nSourcePitch + x + 1]) << 2;
|
||||
|
||||
const size_t chromaOffset = nSourcePitch * height;
|
||||
|
||||
const int y_chroma = y >> 1;
|
||||
|
||||
if (y & 1) // odd scanline ?
|
||||
{
|
||||
uint chromaCb = srcImage[chromaOffset + y_chroma * nSourcePitch + x ];
|
||||
uint chromaCr = srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1];
|
||||
|
||||
if (y_chroma < ((height >> 1) - 1)) // interpolate chroma vertically
|
||||
{
|
||||
chromaCb = (chromaCb + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x ] + 1) >> 1;
|
||||
chromaCr = (chromaCr + srcImage[chromaOffset + (y_chroma + 1) * nSourcePitch + x + 1] + 1) >> 1;
|
||||
}
|
||||
|
||||
yuv101010Pel[0] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[0] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
|
||||
yuv101010Pel[1] |= (chromaCb << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[1] |= (chromaCr << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
}
|
||||
else
|
||||
{
|
||||
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[0] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
|
||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x ] << ( COLOR_COMPONENT_BIT_SIZE + 2));
|
||||
yuv101010Pel[1] |= ((uint)srcImage[chromaOffset + y_chroma * nSourcePitch + x + 1] << ((COLOR_COMPONENT_BIT_SIZE << 1) + 2));
|
||||
}
|
||||
|
||||
// this steps performs the color conversion
|
||||
uint yuvi[6];
|
||||
float red[2], green[2], blue[2];
|
||||
|
||||
yuvi[0] = (yuv101010Pel[0] & COLOR_COMPONENT_MASK );
|
||||
yuvi[1] = ((yuv101010Pel[0] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||
yuvi[2] = ((yuv101010Pel[0] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||
|
||||
yuvi[3] = (yuv101010Pel[1] & COLOR_COMPONENT_MASK );
|
||||
yuvi[4] = ((yuv101010Pel[1] >> COLOR_COMPONENT_BIT_SIZE) & COLOR_COMPONENT_MASK);
|
||||
yuvi[5] = ((yuv101010Pel[1] >> (COLOR_COMPONENT_BIT_SIZE << 1)) & COLOR_COMPONENT_MASK);
|
||||
|
||||
// YUV to RGB Transformation conversion
|
||||
YUV2RGB(&yuvi[0], &red[0], &green[0], &blue[0]);
|
||||
YUV2RGB(&yuvi[3], &red[1], &green[1], &blue[1]);
|
||||
|
||||
// Clamp the results to RGBA
|
||||
|
||||
const size_t dstImagePitch = nDestPitch >> 2;
|
||||
|
||||
dstImage[y * dstImagePitch + x ] = RGBAPACK_10bit(red[0], green[0], blue[0], constAlpha);
|
||||
dstImage[y * dstImagePitch + x + 1 ] = RGBAPACK_10bit(red[1], green[1], blue[1], constAlpha);
|
||||
}
|
||||
|
||||
void NV12ToARGB_gpu(const PtrStepb decodedFrame, PtrStepSz<uint> interopFrame, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(interopFrame.cols, 2 * block.x), divUp(interopFrame.rows, block.y));
|
||||
|
||||
NV12ToARGB<<<grid, block, 0, stream>>>(decodedFrame.data, decodedFrame.step, interopFrame.data, interopFrame.step,
|
||||
interopFrame.cols, interopFrame.rows);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@ -150,11 +150,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
static caller_t funcs[] =
|
||||
{
|
||||
bilateral_caller<T, BrdConstant>,
|
||||
bilateral_caller<T, BrdReflect101>,
|
||||
bilateral_caller<T, BrdReplicate>,
|
||||
bilateral_caller<T, BrdConstant>,
|
||||
bilateral_caller<T, BrdReflect>,
|
||||
bilateral_caller<T, BrdWrap>,
|
||||
bilateral_caller<T, BrdReflect101>
|
||||
};
|
||||
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
|
||||
}
|
@ -94,8 +94,8 @@ namespace canny
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
struct SrcTex
|
||||
{
|
||||
int xoff;
|
||||
int yoff;
|
||||
const int xoff;
|
||||
const int yoff;
|
||||
__host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
|
||||
|
||||
__device__ __forceinline__ int operator ()(int y, int x) const
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "column_filter.hpp"
|
||||
#include "column_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
@ -187,38 +187,38 @@ namespace filter
|
||||
{
|
||||
{
|
||||
0,
|
||||
column_filter::caller< 1, T, D, BrdColConstant>,
|
||||
column_filter::caller< 2, T, D, BrdColConstant>,
|
||||
column_filter::caller< 3, T, D, BrdColConstant>,
|
||||
column_filter::caller< 4, T, D, BrdColConstant>,
|
||||
column_filter::caller< 5, T, D, BrdColConstant>,
|
||||
column_filter::caller< 6, T, D, BrdColConstant>,
|
||||
column_filter::caller< 7, T, D, BrdColConstant>,
|
||||
column_filter::caller< 8, T, D, BrdColConstant>,
|
||||
column_filter::caller< 9, T, D, BrdColConstant>,
|
||||
column_filter::caller<10, T, D, BrdColConstant>,
|
||||
column_filter::caller<11, T, D, BrdColConstant>,
|
||||
column_filter::caller<12, T, D, BrdColConstant>,
|
||||
column_filter::caller<13, T, D, BrdColConstant>,
|
||||
column_filter::caller<14, T, D, BrdColConstant>,
|
||||
column_filter::caller<15, T, D, BrdColConstant>,
|
||||
column_filter::caller<16, T, D, BrdColConstant>,
|
||||
column_filter::caller<17, T, D, BrdColConstant>,
|
||||
column_filter::caller<18, T, D, BrdColConstant>,
|
||||
column_filter::caller<19, T, D, BrdColConstant>,
|
||||
column_filter::caller<20, T, D, BrdColConstant>,
|
||||
column_filter::caller<21, T, D, BrdColConstant>,
|
||||
column_filter::caller<22, T, D, BrdColConstant>,
|
||||
column_filter::caller<23, T, D, BrdColConstant>,
|
||||
column_filter::caller<24, T, D, BrdColConstant>,
|
||||
column_filter::caller<25, T, D, BrdColConstant>,
|
||||
column_filter::caller<26, T, D, BrdColConstant>,
|
||||
column_filter::caller<27, T, D, BrdColConstant>,
|
||||
column_filter::caller<28, T, D, BrdColConstant>,
|
||||
column_filter::caller<29, T, D, BrdColConstant>,
|
||||
column_filter::caller<30, T, D, BrdColConstant>,
|
||||
column_filter::caller<31, T, D, BrdColConstant>,
|
||||
column_filter::caller<32, T, D, BrdColConstant>
|
||||
column_filter::caller< 1, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 2, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 3, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 4, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 5, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 6, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 7, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 8, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 9, T, D, BrdColReflect101>,
|
||||
column_filter::caller<10, T, D, BrdColReflect101>,
|
||||
column_filter::caller<11, T, D, BrdColReflect101>,
|
||||
column_filter::caller<12, T, D, BrdColReflect101>,
|
||||
column_filter::caller<13, T, D, BrdColReflect101>,
|
||||
column_filter::caller<14, T, D, BrdColReflect101>,
|
||||
column_filter::caller<15, T, D, BrdColReflect101>,
|
||||
column_filter::caller<16, T, D, BrdColReflect101>,
|
||||
column_filter::caller<17, T, D, BrdColReflect101>,
|
||||
column_filter::caller<18, T, D, BrdColReflect101>,
|
||||
column_filter::caller<19, T, D, BrdColReflect101>,
|
||||
column_filter::caller<20, T, D, BrdColReflect101>,
|
||||
column_filter::caller<21, T, D, BrdColReflect101>,
|
||||
column_filter::caller<22, T, D, BrdColReflect101>,
|
||||
column_filter::caller<23, T, D, BrdColReflect101>,
|
||||
column_filter::caller<24, T, D, BrdColReflect101>,
|
||||
column_filter::caller<25, T, D, BrdColReflect101>,
|
||||
column_filter::caller<26, T, D, BrdColReflect101>,
|
||||
column_filter::caller<27, T, D, BrdColReflect101>,
|
||||
column_filter::caller<28, T, D, BrdColReflect101>,
|
||||
column_filter::caller<29, T, D, BrdColReflect101>,
|
||||
column_filter::caller<30, T, D, BrdColReflect101>,
|
||||
column_filter::caller<31, T, D, BrdColReflect101>,
|
||||
column_filter::caller<32, T, D, BrdColReflect101>
|
||||
},
|
||||
{
|
||||
0,
|
||||
@ -255,6 +255,41 @@ namespace filter
|
||||
column_filter::caller<31, T, D, BrdColReplicate>,
|
||||
column_filter::caller<32, T, D, BrdColReplicate>
|
||||
},
|
||||
{
|
||||
0,
|
||||
column_filter::caller< 1, T, D, BrdColConstant>,
|
||||
column_filter::caller< 2, T, D, BrdColConstant>,
|
||||
column_filter::caller< 3, T, D, BrdColConstant>,
|
||||
column_filter::caller< 4, T, D, BrdColConstant>,
|
||||
column_filter::caller< 5, T, D, BrdColConstant>,
|
||||
column_filter::caller< 6, T, D, BrdColConstant>,
|
||||
column_filter::caller< 7, T, D, BrdColConstant>,
|
||||
column_filter::caller< 8, T, D, BrdColConstant>,
|
||||
column_filter::caller< 9, T, D, BrdColConstant>,
|
||||
column_filter::caller<10, T, D, BrdColConstant>,
|
||||
column_filter::caller<11, T, D, BrdColConstant>,
|
||||
column_filter::caller<12, T, D, BrdColConstant>,
|
||||
column_filter::caller<13, T, D, BrdColConstant>,
|
||||
column_filter::caller<14, T, D, BrdColConstant>,
|
||||
column_filter::caller<15, T, D, BrdColConstant>,
|
||||
column_filter::caller<16, T, D, BrdColConstant>,
|
||||
column_filter::caller<17, T, D, BrdColConstant>,
|
||||
column_filter::caller<18, T, D, BrdColConstant>,
|
||||
column_filter::caller<19, T, D, BrdColConstant>,
|
||||
column_filter::caller<20, T, D, BrdColConstant>,
|
||||
column_filter::caller<21, T, D, BrdColConstant>,
|
||||
column_filter::caller<22, T, D, BrdColConstant>,
|
||||
column_filter::caller<23, T, D, BrdColConstant>,
|
||||
column_filter::caller<24, T, D, BrdColConstant>,
|
||||
column_filter::caller<25, T, D, BrdColConstant>,
|
||||
column_filter::caller<26, T, D, BrdColConstant>,
|
||||
column_filter::caller<27, T, D, BrdColConstant>,
|
||||
column_filter::caller<28, T, D, BrdColConstant>,
|
||||
column_filter::caller<29, T, D, BrdColConstant>,
|
||||
column_filter::caller<30, T, D, BrdColConstant>,
|
||||
column_filter::caller<31, T, D, BrdColConstant>,
|
||||
column_filter::caller<32, T, D, BrdColConstant>
|
||||
},
|
||||
{
|
||||
0,
|
||||
column_filter::caller< 1, T, D, BrdColReflect>,
|
||||
@ -324,41 +359,6 @@ namespace filter
|
||||
column_filter::caller<30, T, D, BrdColWrap>,
|
||||
column_filter::caller<31, T, D, BrdColWrap>,
|
||||
column_filter::caller<32, T, D, BrdColWrap>
|
||||
},
|
||||
{
|
||||
0,
|
||||
column_filter::caller< 1, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 2, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 3, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 4, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 5, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 6, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 7, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 8, T, D, BrdColReflect101>,
|
||||
column_filter::caller< 9, T, D, BrdColReflect101>,
|
||||
column_filter::caller<10, T, D, BrdColReflect101>,
|
||||
column_filter::caller<11, T, D, BrdColReflect101>,
|
||||
column_filter::caller<12, T, D, BrdColReflect101>,
|
||||
column_filter::caller<13, T, D, BrdColReflect101>,
|
||||
column_filter::caller<14, T, D, BrdColReflect101>,
|
||||
column_filter::caller<15, T, D, BrdColReflect101>,
|
||||
column_filter::caller<16, T, D, BrdColReflect101>,
|
||||
column_filter::caller<17, T, D, BrdColReflect101>,
|
||||
column_filter::caller<18, T, D, BrdColReflect101>,
|
||||
column_filter::caller<19, T, D, BrdColReflect101>,
|
||||
column_filter::caller<20, T, D, BrdColReflect101>,
|
||||
column_filter::caller<21, T, D, BrdColReflect101>,
|
||||
column_filter::caller<22, T, D, BrdColReflect101>,
|
||||
column_filter::caller<23, T, D, BrdColReflect101>,
|
||||
column_filter::caller<24, T, D, BrdColReflect101>,
|
||||
column_filter::caller<25, T, D, BrdColReflect101>,
|
||||
column_filter::caller<26, T, D, BrdColReflect101>,
|
||||
column_filter::caller<27, T, D, BrdColReflect101>,
|
||||
column_filter::caller<28, T, D, BrdColReflect101>,
|
||||
column_filter::caller<29, T, D, BrdColReflect101>,
|
||||
column_filter::caller<30, T, D, BrdColReflect101>,
|
||||
column_filter::caller<31, T, D, BrdColReflect101>,
|
||||
column_filter::caller<32, T, D, BrdColReflect101>
|
||||
}
|
||||
};
|
||||
|
@ -86,11 +86,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
static const caller_t callers[5] =
|
||||
{
|
||||
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReplicate, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdConstant, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReflect, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call,
|
||||
CopyMakeBorderDispatcher<BrdReflect101, vec_type>::call
|
||||
CopyMakeBorderDispatcher<BrdWrap, vec_type>::call
|
||||
};
|
||||
|
||||
callers[borderMode](PtrStepSz<vec_type>(src), PtrStepSz<vec_type>(dst), top, left, borderValue, stream);
|
2636
modules/gpu/src/cuda/element_operations.cu
Normal file
2636
modules/gpu/src/cuda/element_operations.cu
Normal file
File diff suppressed because it is too large
Load Diff
@ -48,7 +48,7 @@
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "fgd.hpp"
|
||||
#include "fgd_bgfg_common.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
1008
modules/gpu/src/cuda/imgproc.cu
Normal file
1008
modules/gpu/src/cuda/imgproc.cu
Normal file
File diff suppressed because it is too large
Load Diff
@ -40,34 +40,34 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __THREAD_WRAPPERS_H__
|
||||
#define __THREAD_WRAPPERS_H__
|
||||
#ifndef __OPENCV_internal_shared_HPP__
|
||||
#define __OPENCV_internal_shared_HPP__
|
||||
|
||||
#include "opencv2/core.hpp"
|
||||
#include <cuda_runtime.h>
|
||||
#include <npp.h>
|
||||
#include "NPP_staging.hpp"
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "safe_call.hpp"
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace detail {
|
||||
|
||||
class Thread
|
||||
namespace cv { namespace gpu
|
||||
{
|
||||
public:
|
||||
typedef void (*Func)(void* userData);
|
||||
class NppStStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStStreamHandler(cudaStream_t newStream = 0)
|
||||
{
|
||||
oldStream = nppStSetActiveCUDAstream(newStream);
|
||||
}
|
||||
|
||||
explicit Thread(Func func, void* userData = 0);
|
||||
inline ~NppStStreamHandler()
|
||||
{
|
||||
nppStSetActiveCUDAstream(oldStream);
|
||||
}
|
||||
|
||||
void wait();
|
||||
private:
|
||||
cudaStream_t oldStream;
|
||||
};
|
||||
}}
|
||||
|
||||
static void sleep(int ms);
|
||||
|
||||
class Impl;
|
||||
|
||||
private:
|
||||
cv::Ptr<Impl> impl_;
|
||||
};
|
||||
|
||||
}}}
|
||||
|
||||
namespace cv {
|
||||
template <> void Ptr<cv::gpu::detail::Thread::Impl>::delete_obj();
|
||||
}
|
||||
|
||||
#endif // __THREAD_WRAPPERS_H__
|
||||
#endif /* __OPENCV_internal_shared_HPP__ */
|
1366
modules/gpu/src/cuda/matrix_reductions.cu
Normal file
1366
modules/gpu/src/cuda/matrix_reductions.cu
Normal file
File diff suppressed because it is too large
Load Diff
@ -40,6 +40,8 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
@ -159,11 +161,11 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
static func_t funcs[] =
|
||||
{
|
||||
nlm_caller<T, BrdConstant>,
|
||||
nlm_caller<T, BrdReflect101>,
|
||||
nlm_caller<T, BrdReplicate>,
|
||||
nlm_caller<T, BrdConstant>,
|
||||
nlm_caller<T, BrdReflect>,
|
||||
nlm_caller<T, BrdWrap>,
|
||||
nlm_caller<T, BrdReflect101>
|
||||
};
|
||||
funcs[borderMode](src, dst, search_radius, block_radius, h, stream);
|
||||
}
|
||||
@ -562,3 +564,6 @@ namespace cv { namespace gpu { namespace cudev
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@ -50,6 +50,125 @@
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::cudev;
|
||||
|
||||
namespace optflowbm
|
||||
{
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_prev(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_curr(false, cudaFilterModePoint, cudaAddressModeClamp);
|
||||
|
||||
__device__ int cmpBlocks(int X1, int Y1, int X2, int Y2, int2 blockSize)
|
||||
{
|
||||
int s = 0;
|
||||
|
||||
for (int y = 0; y < blockSize.y; ++y)
|
||||
{
|
||||
for (int x = 0; x < blockSize.x; ++x)
|
||||
s += ::abs(tex2D(tex_prev, X1 + x, Y1 + y) - tex2D(tex_curr, X2 + x, Y2 + y));
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
__global__ void calcOptFlowBM(PtrStepSzf velx, PtrStepf vely, const int2 blockSize, const int2 shiftSize, const bool usePrevious,
|
||||
const int maxX, const int maxY, const int acceptLevel, const int escapeLevel,
|
||||
const short2* ss, const int ssCount)
|
||||
{
|
||||
const int j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (i >= velx.rows || j >= velx.cols)
|
||||
return;
|
||||
|
||||
const int X1 = j * shiftSize.x;
|
||||
const int Y1 = i * shiftSize.y;
|
||||
|
||||
const int offX = usePrevious ? __float2int_rn(velx(i, j)) : 0;
|
||||
const int offY = usePrevious ? __float2int_rn(vely(i, j)) : 0;
|
||||
|
||||
int X2 = X1 + offX;
|
||||
int Y2 = Y1 + offY;
|
||||
|
||||
int dist = numeric_limits<int>::max();
|
||||
|
||||
if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
|
||||
dist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
|
||||
|
||||
int countMin = 1;
|
||||
int sumx = offX;
|
||||
int sumy = offY;
|
||||
|
||||
if (dist > acceptLevel)
|
||||
{
|
||||
// do brute-force search
|
||||
for (int k = 0; k < ssCount; ++k)
|
||||
{
|
||||
const short2 ssVal = ss[k];
|
||||
|
||||
const int dx = offX + ssVal.x;
|
||||
const int dy = offY + ssVal.y;
|
||||
|
||||
X2 = X1 + dx;
|
||||
Y2 = Y1 + dy;
|
||||
|
||||
if (0 <= X2 && X2 <= maxX && 0 <= Y2 && Y2 <= maxY)
|
||||
{
|
||||
const int tmpDist = cmpBlocks(X1, Y1, X2, Y2, blockSize);
|
||||
if (tmpDist < acceptLevel)
|
||||
{
|
||||
sumx = dx;
|
||||
sumy = dy;
|
||||
countMin = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (tmpDist < dist)
|
||||
{
|
||||
dist = tmpDist;
|
||||
sumx = dx;
|
||||
sumy = dy;
|
||||
countMin = 1;
|
||||
}
|
||||
else if (tmpDist == dist)
|
||||
{
|
||||
sumx += dx;
|
||||
sumy += dy;
|
||||
countMin++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (dist > escapeLevel)
|
||||
{
|
||||
sumx = offX;
|
||||
sumy = offY;
|
||||
countMin = 1;
|
||||
}
|
||||
}
|
||||
|
||||
velx(i, j) = static_cast<float>(sumx) / countMin;
|
||||
vely(i, j) = static_cast<float>(sumy) / countMin;
|
||||
}
|
||||
|
||||
void calc(PtrStepSzb prev, PtrStepSzb curr, PtrStepSzf velx, PtrStepSzf vely, int2 blockSize, int2 shiftSize, bool usePrevious,
|
||||
int maxX, int maxY, int acceptLevel, int escapeLevel, const short2* ss, int ssCount, cudaStream_t stream)
|
||||
{
|
||||
bindTexture(&tex_prev, prev);
|
||||
bindTexture(&tex_curr, curr);
|
||||
|
||||
const dim3 block(32, 8);
|
||||
const dim3 grid(divUp(velx.cols, block.x), divUp(vely.rows, block.y));
|
||||
|
||||
calcOptFlowBM<<<grid, block, 0, stream>>>(velx, vely, blockSize, shiftSize, usePrevious,
|
||||
maxX, maxY, acceptLevel, escapeLevel, ss, ssCount);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
|
||||
/////////////////////////////////////////////////////////
|
||||
// Fast approximate version
|
||||
|
||||
namespace optflowbm_fast
|
||||
{
|
||||
enum
|
@ -525,11 +525,8 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0 /*gaussianBlurCaller<BrdConstant<float> >*/,
|
||||
gaussianBlurCaller<BrdReflect101<float> >,
|
||||
gaussianBlurCaller<BrdReplicate<float> >,
|
||||
0 /*gaussianBlurCaller<BrdReflect<float> >*/,
|
||||
0 /*gaussianBlurCaller<BrdWrap<float> >*/,
|
||||
gaussianBlurCaller<BrdReflect101<float> >
|
||||
};
|
||||
|
||||
callers[borderMode](src, ksizeHalf, dst, stream);
|
||||
@ -623,11 +620,8 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0 /*gaussianBlur5Caller<BrdConstant<float>,256>*/,
|
||||
gaussianBlur5Caller<BrdReflect101<float>,256>,
|
||||
gaussianBlur5Caller<BrdReplicate<float>,256>,
|
||||
0 /*gaussianBlur5Caller<BrdReflect<float>,256>*/,
|
||||
0 /*gaussianBlur5Caller<BrdWrap<float>,256>*/,
|
||||
gaussianBlur5Caller<BrdReflect101<float>,256>
|
||||
};
|
||||
|
||||
callers[borderMode](src, ksizeHalf, dst, stream);
|
||||
@ -640,11 +634,8 @@ namespace cv { namespace gpu { namespace cudev { namespace optflow_farneback
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
0 /*gaussianBlur5Caller<BrdConstant<float>,128>*/,
|
||||
gaussianBlur5Caller<BrdReflect101<float>,128>,
|
||||
gaussianBlur5Caller<BrdReplicate<float>,128>,
|
||||
0 /*gaussianBlur5Caller<BrdReflect<float>,128>*/,
|
||||
0 /*gaussianBlur5Caller<BrdWrap<float>,128>*/,
|
||||
gaussianBlur5Caller<BrdReflect101<float>,128>
|
||||
};
|
||||
|
||||
callers[borderMode](src, ksizeHalf, dst, stream);
|
@ -212,25 +212,25 @@ namespace cv { namespace gpu { namespace cudev
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
{
|
||||
RemapDispatcher<PointFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdWrap, T>::call,
|
||||
RemapDispatcher<PointFilter, BrdReflect101, T>::call
|
||||
RemapDispatcher<PointFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdWrap, T>::call,
|
||||
RemapDispatcher<LinearFilter, BrdReflect101, T>::call
|
||||
RemapDispatcher<LinearFilter, BrdWrap, T>::call
|
||||
},
|
||||
{
|
||||
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect101, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReplicate, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdConstant, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdWrap, T>::call,
|
||||
RemapDispatcher<CubicFilter, BrdReflect101, T>::call
|
||||
RemapDispatcher<CubicFilter, BrdWrap, T>::call
|
||||
}
|
||||
};
|
||||
|
175
modules/gpu/src/cuda/rgb_to_yv12.cu
Normal file
175
modules/gpu/src/cuda/rgb_to_yv12.cu
Normal file
@ -0,0 +1,175 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace cudev
|
||||
{
|
||||
namespace video_encoding
|
||||
{
|
||||
__device__ __forceinline__ void rgbtoy(const uchar b, const uchar g, const uchar r, uchar& y)
|
||||
{
|
||||
y = static_cast<uchar>(((int)(30 * r) + (int)(59 * g) + (int)(11 * b)) / 100);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ void rgbtoyuv(const uchar b, const uchar g, const uchar r, uchar& y, uchar& u, uchar& v)
|
||||
{
|
||||
rgbtoy(b, g, r, y);
|
||||
u = static_cast<uchar>(((int)(-17 * r) - (int)(33 * g) + (int)(50 * b) + 12800) / 100);
|
||||
v = static_cast<uchar>(((int)(50 * r) - (int)(42 * g) - (int)(8 * b) + 12800) / 100);
|
||||
}
|
||||
|
||||
__global__ void Gray_to_YV12(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||
|
||||
if (x + 1 >= src.cols || y + 1 >= src.rows)
|
||||
return;
|
||||
|
||||
// get pointers to the data
|
||||
const size_t planeSize = src.rows * dst.step;
|
||||
PtrStepb y_plane(dst.data, dst.step);
|
||||
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
|
||||
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
|
||||
|
||||
uchar pix;
|
||||
uchar y_val, u_val, v_val;
|
||||
|
||||
pix = src(y, x);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y, x) = y_val;
|
||||
|
||||
pix = src(y, x + 1);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y, x + 1) = y_val;
|
||||
|
||||
pix = src(y + 1, x);
|
||||
rgbtoy(pix, pix, pix, y_val);
|
||||
y_plane(y + 1, x) = y_val;
|
||||
|
||||
pix = src(y + 1, x + 1);
|
||||
rgbtoyuv(pix, pix, pix, y_val, u_val, v_val);
|
||||
y_plane(y + 1, x + 1) = y_val;
|
||||
u_plane(y / 2, x / 2) = u_val;
|
||||
v_plane(y / 2, x / 2) = v_val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__global__ void BGR_to_YV12(const PtrStepSz<T> src, PtrStepb dst)
|
||||
{
|
||||
const int x = (blockIdx.x * blockDim.x + threadIdx.x) * 2;
|
||||
const int y = (blockIdx.y * blockDim.y + threadIdx.y) * 2;
|
||||
|
||||
if (x + 1 >= src.cols || y + 1 >= src.rows)
|
||||
return;
|
||||
|
||||
// get pointers to the data
|
||||
const size_t planeSize = src.rows * dst.step;
|
||||
PtrStepb y_plane(dst.data, dst.step);
|
||||
PtrStepb u_plane(y_plane.data + planeSize, dst.step / 2);
|
||||
PtrStepb v_plane(u_plane.data + (planeSize / 4), dst.step / 2);
|
||||
|
||||
T pix;
|
||||
uchar y_val, u_val, v_val;
|
||||
|
||||
pix = src(y, x);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y, x) = y_val;
|
||||
|
||||
pix = src(y, x + 1);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y, x + 1) = y_val;
|
||||
|
||||
pix = src(y + 1, x);
|
||||
rgbtoy(pix.z, pix.y, pix.x, y_val);
|
||||
y_plane(y + 1, x) = y_val;
|
||||
|
||||
pix = src(y + 1, x + 1);
|
||||
rgbtoyuv(pix.z, pix.y, pix.x, y_val, u_val, v_val);
|
||||
y_plane(y + 1, x + 1) = y_val;
|
||||
u_plane(y / 2, x / 2) = u_val;
|
||||
v_plane(y / 2, x / 2) = v_val;
|
||||
}
|
||||
|
||||
void Gray_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
Gray_to_YV12<<<grid, block>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
template <int cn>
|
||||
void BGR_to_YV12_caller(const PtrStepSzb src, PtrStepb dst)
|
||||
{
|
||||
typedef typename TypeVec<uchar, cn>::vec_type src_t;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(src.cols, block.x * 2), divUp(src.rows, block.y * 2));
|
||||
|
||||
BGR_to_YV12<<<grid, block>>>(static_cast< PtrStepSz<src_t> >(src), dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void YV12_gpu(const PtrStepSzb src, int cn, PtrStepSzb dst)
|
||||
{
|
||||
typedef void (*func_t)(const PtrStepSzb src, PtrStepb dst);
|
||||
|
||||
static const func_t funcs[] =
|
||||
{
|
||||
0, Gray_to_YV12_caller, 0, BGR_to_YV12_caller<3>, BGR_to_YV12_caller<4>
|
||||
};
|
||||
|
||||
funcs[cn](src, dst);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* CUDA_DISABLER */
|
@ -42,7 +42,7 @@
|
||||
|
||||
#if !defined CUDA_DISABLER
|
||||
|
||||
#include "row_filter.hpp"
|
||||
#include "row_filter.h"
|
||||
|
||||
namespace filter
|
||||
{
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user