Merge pull request #964 from jet47:cuda-5.5-support
This commit is contained in:
commit
173442bb2e
File diff suppressed because it is too large
Load Diff
@ -60,6 +60,8 @@
|
||||
# include "opencv2/core/stream_accessor.hpp"
|
||||
# include "opencv2/core/cuda/common.hpp"
|
||||
|
||||
# define NPP_VERSION (NPP_VERSION_MAJOR * 1000 + NPP_VERSION_MINOR * 100 + NPP_VERSION_BUILD)
|
||||
|
||||
# define CUDART_MINIMUM_REQUIRED_VERSION 4020
|
||||
|
||||
# if (CUDART_VERSION < CUDART_MINIMUM_REQUIRED_VERSION)
|
||||
|
@ -1547,48 +1547,90 @@ namespace
|
||||
|
||||
const ErrorEntry npp_errors [] =
|
||||
{
|
||||
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
|
||||
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
|
||||
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
|
||||
|
||||
#if defined (_MSC_VER)
|
||||
#if defined (_MSC_VER)
|
||||
error_entry( NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY ),
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if NPP_VERSION < 5500
|
||||
error_entry( NPP_BAD_ARG_ERROR ),
|
||||
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NPP_COEFF_ERROR ),
|
||||
error_entry( NPP_RECT_ERROR ),
|
||||
error_entry( NPP_QUAD_ERROR ),
|
||||
error_entry( NPP_MEMFREE_ERR ),
|
||||
error_entry( NPP_MEMSET_ERR ),
|
||||
error_entry( NPP_MEM_ALLOC_ERR ),
|
||||
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_MIRROR_FLIP_ERR ),
|
||||
error_entry( NPP_INVALID_INPUT ),
|
||||
error_entry( NPP_POINTER_ERROR ),
|
||||
error_entry( NPP_WARNING ),
|
||||
error_entry( NPP_ODD_ROI_WARNING ),
|
||||
#else
|
||||
error_entry( NPP_INVALID_HOST_POINTER_ERROR ),
|
||||
error_entry( NPP_INVALID_DEVICE_POINTER_ERROR ),
|
||||
error_entry( NPP_LUT_PALETTE_BITSIZE_ERROR ),
|
||||
error_entry( NPP_ZC_MODE_NOT_SUPPORTED_ERROR ),
|
||||
error_entry( NPP_MEMFREE_ERROR ),
|
||||
error_entry( NPP_MEMSET_ERROR ),
|
||||
error_entry( NPP_QUALITY_INDEX_ERROR ),
|
||||
error_entry( NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_CHANNEL_ORDER_ERROR ),
|
||||
error_entry( NPP_ZERO_MASK_VALUE_ERROR ),
|
||||
error_entry( NPP_QUADRANGLE_ERROR ),
|
||||
error_entry( NPP_RECTANGLE_ERROR ),
|
||||
error_entry( NPP_COEFFICIENT_ERROR ),
|
||||
error_entry( NPP_NUMBER_OF_CHANNELS_ERROR ),
|
||||
error_entry( NPP_COI_ERROR ),
|
||||
error_entry( NPP_DIVISOR_ERROR ),
|
||||
error_entry( NPP_CHANNEL_ERROR ),
|
||||
error_entry( NPP_STRIDE_ERROR ),
|
||||
error_entry( NPP_ANCHOR_ERROR ),
|
||||
error_entry( NPP_MASK_SIZE_ERROR ),
|
||||
error_entry( NPP_MIRROR_FLIP_ERROR ),
|
||||
error_entry( NPP_MOMENT_00_ZERO_ERROR ),
|
||||
error_entry( NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR ),
|
||||
error_entry( NPP_THRESHOLD_ERROR ),
|
||||
error_entry( NPP_CONTEXT_MATCH_ERROR ),
|
||||
error_entry( NPP_FFT_FLAG_ERROR ),
|
||||
error_entry( NPP_FFT_ORDER_ERROR ),
|
||||
error_entry( NPP_SCALE_RANGE_ERROR ),
|
||||
error_entry( NPP_DATA_TYPE_ERROR ),
|
||||
error_entry( NPP_OUT_OFF_RANGE_ERROR ),
|
||||
error_entry( NPP_DIVIDE_BY_ZERO_ERROR ),
|
||||
error_entry( NPP_MEMORY_ALLOCATION_ERR ),
|
||||
error_entry( NPP_RANGE_ERROR ),
|
||||
error_entry( NPP_BAD_ARGUMENT_ERROR ),
|
||||
error_entry( NPP_NO_MEMORY_ERROR ),
|
||||
error_entry( NPP_ERROR_RESERVED ),
|
||||
error_entry( NPP_NO_OPERATION_WARNING ),
|
||||
error_entry( NPP_DIVIDE_BY_ZERO_WARNING ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_ROI_WARNING ),
|
||||
#endif
|
||||
|
||||
error_entry( NPP_NOT_SUPPORTED_MODE_ERROR ),
|
||||
error_entry( NPP_ROUND_MODE_NOT_SUPPORTED_ERROR ),
|
||||
error_entry( NPP_RESIZE_NO_OPERATION_ERROR ),
|
||||
error_entry( NPP_LUT_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_TEXTURE_BIND_ERROR ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_ROI_ERROR ),
|
||||
error_entry( NPP_NOT_EVEN_STEP_ERROR ),
|
||||
error_entry( NPP_INTERPOLATION_ERROR ),
|
||||
error_entry( NPP_RESIZE_FACTOR_ERROR ),
|
||||
error_entry( NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR ),
|
||||
error_entry( NPP_MEMFREE_ERR ),
|
||||
error_entry( NPP_MEMSET_ERR ),
|
||||
error_entry( NPP_MEMCPY_ERROR ),
|
||||
error_entry( NPP_MEM_ALLOC_ERR ),
|
||||
error_entry( NPP_HISTO_NUMBER_OF_LEVELS_ERROR ),
|
||||
error_entry( NPP_MIRROR_FLIP_ERR ),
|
||||
error_entry( NPP_INVALID_INPUT ),
|
||||
error_entry( NPP_ALIGNMENT_ERROR ),
|
||||
error_entry( NPP_STEP_ERROR ),
|
||||
error_entry( NPP_SIZE_ERROR ),
|
||||
error_entry( NPP_POINTER_ERROR ),
|
||||
error_entry( NPP_NULL_POINTER_ERROR ),
|
||||
error_entry( NPP_CUDA_KERNEL_EXECUTION_ERROR ),
|
||||
error_entry( NPP_NOT_IMPLEMENTED_ERROR ),
|
||||
error_entry( NPP_ERROR ),
|
||||
error_entry( NPP_NO_ERROR ),
|
||||
error_entry( NPP_SUCCESS ),
|
||||
error_entry( NPP_WARNING ),
|
||||
error_entry( NPP_WRONG_INTERSECTION_QUAD_WARNING ),
|
||||
error_entry( NPP_MISALIGNED_DST_ROI_WARNING ),
|
||||
error_entry( NPP_AFFINE_QUAD_INCORRECT_WARNING ),
|
||||
error_entry( NPP_DOUBLE_SIZE_WARNING ),
|
||||
error_entry( NPP_ODD_ROI_WARNING )
|
||||
error_entry( NPP_DOUBLE_SIZE_WARNING )
|
||||
};
|
||||
|
||||
const size_t npp_error_num = sizeof(npp_errors) / sizeof(npp_errors[0]);
|
||||
|
@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
|
||||
{
|
||||
I d = a - b;
|
||||
I d = saturate_cast<I>(a - b);
|
||||
return lo.x <= d.x && d.x <= hi.x &&
|
||||
lo.y <= d.y && d.y <= hi.y &&
|
||||
lo.z <= d.z && d.z <= hi.z;
|
||||
@ -169,7 +169,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
template<typename I> __device__ __forceinline__ bool operator() (const I& a, const I& b) const
|
||||
{
|
||||
I d = a - b;
|
||||
I d = saturate_cast<I>(a - b);
|
||||
return lo.x <= d.x && d.x <= hi.x &&
|
||||
lo.y <= d.y && d.y <= hi.y &&
|
||||
lo.z <= d.z && d.z <= hi.z &&
|
||||
|
@ -62,8 +62,8 @@ namespace arithm
|
||||
return vabsdiff4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff4() {}
|
||||
__device__ __forceinline__ VAbsDiff4(const VAbsDiff4& other) {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff4() {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff4(const VAbsDiff4&) {}
|
||||
};
|
||||
|
||||
struct VAbsDiff2 : binary_function<uint, uint, uint>
|
||||
@ -73,8 +73,8 @@ namespace arithm
|
||||
return vabsdiff2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAbsDiff2() {}
|
||||
__device__ __forceinline__ VAbsDiff2(const VAbsDiff2& other) {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff2() {}
|
||||
__host__ __device__ __forceinline__ VAbsDiff2(const VAbsDiff2&) {}
|
||||
};
|
||||
|
||||
__device__ __forceinline__ int _abs(int a)
|
||||
@ -97,8 +97,8 @@ namespace arithm
|
||||
return saturate_cast<T>(_abs(a - b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AbsDiffMat() {}
|
||||
__device__ __forceinline__ AbsDiffMat(const AbsDiffMat& other) {}
|
||||
__host__ __device__ __forceinline__ AbsDiffMat() {}
|
||||
__host__ __device__ __forceinline__ AbsDiffMat(const AbsDiffMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,7 @@ namespace arithm
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit AbsDiffScalar(S val_) : val(val_) {}
|
||||
__host__ explicit AbsDiffScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ T operator ()(T a) const
|
||||
{
|
||||
|
@ -62,8 +62,8 @@ namespace arithm
|
||||
return vadd4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd4() {}
|
||||
__device__ __forceinline__ VAdd4(const VAdd4& other) {}
|
||||
__host__ __device__ __forceinline__ VAdd4() {}
|
||||
__host__ __device__ __forceinline__ VAdd4(const VAdd4&) {}
|
||||
};
|
||||
|
||||
struct VAdd2 : binary_function<uint, uint, uint>
|
||||
@ -73,8 +73,8 @@ namespace arithm
|
||||
return vadd2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VAdd2() {}
|
||||
__device__ __forceinline__ VAdd2(const VAdd2& other) {}
|
||||
__host__ __device__ __forceinline__ VAdd2() {}
|
||||
__host__ __device__ __forceinline__ VAdd2(const VAdd2&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct AddMat : binary_function<T, T, D>
|
||||
@ -84,8 +84,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a + b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ AddMat() {}
|
||||
__device__ __forceinline__ AddMat(const AddMat& other) {}
|
||||
__host__ __device__ __forceinline__ AddMat() {}
|
||||
__host__ __device__ __forceinline__ AddMat(const AddMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,7 @@ namespace arithm
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit AddScalar(S val_) : val(val_) {}
|
||||
__host__ explicit AddScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
|
@ -74,7 +74,7 @@ namespace arithm
|
||||
float beta;
|
||||
float gamma;
|
||||
|
||||
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
|
||||
__host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
|
||||
{
|
||||
@ -87,7 +87,7 @@ namespace arithm
|
||||
double beta;
|
||||
double gamma;
|
||||
|
||||
AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
|
||||
__host__ AddWeighted_(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
|
||||
{
|
||||
|
@ -62,8 +62,8 @@ namespace arithm
|
||||
return vcmpeq4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpEq4() {}
|
||||
__device__ __forceinline__ VCmpEq4(const VCmpEq4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpEq4() {}
|
||||
__host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
|
||||
};
|
||||
struct VCmpNe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@ -72,8 +72,8 @@ namespace arithm
|
||||
return vcmpne4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpNe4() {}
|
||||
__device__ __forceinline__ VCmpNe4(const VCmpNe4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpNe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
|
||||
};
|
||||
struct VCmpLt4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@ -82,8 +82,8 @@ namespace arithm
|
||||
return vcmplt4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLt4() {}
|
||||
__device__ __forceinline__ VCmpLt4(const VCmpLt4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpLt4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
|
||||
};
|
||||
struct VCmpLe4 : binary_function<uint, uint, uint>
|
||||
{
|
||||
@ -92,8 +92,8 @@ namespace arithm
|
||||
return vcmple4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VCmpLe4() {}
|
||||
__device__ __forceinline__ VCmpLe4(const VCmpLe4& other) {}
|
||||
__host__ __device__ __forceinline__ VCmpLe4() {}
|
||||
__host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
|
||||
};
|
||||
|
||||
template <class Op, typename T>
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
|
||||
|
@ -59,7 +59,7 @@ namespace arithm
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit DivInv(S val_) : val(val_) {}
|
||||
__host__ explicit DivInv(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
|
@ -91,8 +91,8 @@ namespace arithm
|
||||
return b != 0 ? saturate_cast<D>(a / b) : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, float> : binary_function<T, T, float>
|
||||
{
|
||||
@ -101,8 +101,8 @@ namespace arithm
|
||||
return b != 0 ? static_cast<float>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
template <typename T> struct Div<T, double> : binary_function<T, T, double>
|
||||
{
|
||||
@ -111,15 +111,15 @@ namespace arithm
|
||||
return b != 0 ? static_cast<double>(a) / b : 0;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Div() {}
|
||||
__device__ __forceinline__ Div(const Div& other) {}
|
||||
__host__ __device__ __forceinline__ Div() {}
|
||||
__host__ __device__ __forceinline__ Div(const Div&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
|
||||
{
|
||||
S scale;
|
||||
|
||||
explicit DivScale(S scale_) : scale(scale_) {}
|
||||
__host__ explicit DivScale(S scale_) : scale(scale_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
|
@ -59,7 +59,7 @@ namespace arithm
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit DivScalar(S val_) : val(val_) {}
|
||||
__host__ explicit DivScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
|
@ -94,8 +94,8 @@ namespace arithm
|
||||
return saturate_cast<T>(x * x);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Sqr() {}
|
||||
__device__ __forceinline__ Sqr(const Sqr& other) {}
|
||||
__host__ __device__ __forceinline__ Sqr() {}
|
||||
__host__ __device__ __forceinline__ Sqr(const Sqr&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -190,8 +190,8 @@ namespace arithm
|
||||
return saturate_cast<T>(f(x));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Exp() {}
|
||||
__device__ __forceinline__ Exp(const Exp& other) {}
|
||||
__host__ __device__ __forceinline__ Exp() {}
|
||||
__host__ __device__ __forceinline__ Exp(const Exp&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -228,7 +228,7 @@ namespace arithm
|
||||
{
|
||||
float power;
|
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T e) const
|
||||
{
|
||||
@ -239,7 +239,7 @@ namespace arithm
|
||||
{
|
||||
float power;
|
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T e) const
|
||||
{
|
||||
@ -255,7 +255,7 @@ namespace arithm
|
||||
{
|
||||
float power;
|
||||
|
||||
PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
__host__ explicit PowOp(double power_) : power(static_cast<float>(power_)) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(float e) const
|
||||
{
|
||||
@ -266,7 +266,7 @@ namespace arithm
|
||||
{
|
||||
double power;
|
||||
|
||||
PowOp(double power_) : power(power_) {}
|
||||
__host__ explicit PowOp(double power_) : power(power_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(double e) const
|
||||
{
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
|
@ -65,8 +65,8 @@ namespace arithm
|
||||
return vmin4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin4() {}
|
||||
__device__ __forceinline__ VMin4(const VMin4& other) {}
|
||||
__host__ __device__ __forceinline__ VMin4() {}
|
||||
__host__ __device__ __forceinline__ VMin4(const VMin4&) {}
|
||||
};
|
||||
|
||||
struct VMin2 : binary_function<uint, uint, uint>
|
||||
@ -76,8 +76,8 @@ namespace arithm
|
||||
return vmin2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMin2() {}
|
||||
__device__ __forceinline__ VMin2(const VMin2& other) {}
|
||||
__host__ __device__ __forceinline__ VMin2() {}
|
||||
__host__ __device__ __forceinline__ VMin2(const VMin2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
@ -151,8 +151,8 @@ namespace arithm
|
||||
return vmax4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax4() {}
|
||||
__device__ __forceinline__ VMax4(const VMax4& other) {}
|
||||
__host__ __device__ __forceinline__ VMax4() {}
|
||||
__host__ __device__ __forceinline__ VMax4(const VMax4&) {}
|
||||
};
|
||||
|
||||
struct VMax2 : binary_function<uint, uint, uint>
|
||||
@ -162,8 +162,8 @@ namespace arithm
|
||||
return vmax2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VMax2() {}
|
||||
__device__ __forceinline__ VMax2(const VMax2& other) {}
|
||||
__host__ __device__ __forceinline__ VMax2() {}
|
||||
__host__ __device__ __forceinline__ VMax2(const VMax2&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
|
@ -69,8 +69,8 @@ namespace arithm
|
||||
return res;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_8uc4_32f() {}
|
||||
__device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f& other) {}
|
||||
__host__ __device__ __forceinline__ Mul_8uc4_32f() {}
|
||||
__host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
|
||||
};
|
||||
|
||||
struct Mul_16sc4_32f : binary_function<short4, float, short4>
|
||||
@ -81,8 +81,8 @@ namespace arithm
|
||||
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul_16sc4_32f() {}
|
||||
__device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f& other) {}
|
||||
__host__ __device__ __forceinline__ Mul_16sc4_32f() {}
|
||||
__host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct Mul : binary_function<T, T, D>
|
||||
@ -92,15 +92,15 @@ namespace arithm
|
||||
return saturate_cast<D>(a * b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Mul() {}
|
||||
__device__ __forceinline__ Mul(const Mul& other) {}
|
||||
__host__ __device__ __forceinline__ Mul() {}
|
||||
__host__ __device__ __forceinline__ Mul(const Mul&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
|
||||
{
|
||||
S scale;
|
||||
|
||||
explicit MulScale(S scale_) : scale(scale_) {}
|
||||
__host__ explicit MulScale(S scale_) : scale(scale_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a, T b) const
|
||||
{
|
||||
|
@ -59,7 +59,7 @@ namespace arithm
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit MulScalar(S val_) : val(val_) {}
|
||||
__host__ explicit MulScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
|
@ -46,6 +46,7 @@
|
||||
#include "opencv2/core/cuda/saturate_cast.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
|
||||
@ -76,8 +77,8 @@ namespace reduce
|
||||
return r;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Sum() {}
|
||||
__device__ __forceinline__ Sum(const Sum&) {}
|
||||
__host__ __device__ __forceinline__ Sum() {}
|
||||
__host__ __device__ __forceinline__ Sum(const Sum&) {}
|
||||
};
|
||||
|
||||
struct Avg
|
||||
@ -100,8 +101,8 @@ namespace reduce
|
||||
return r / sz;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Avg() {}
|
||||
__device__ __forceinline__ Avg(const Avg&) {}
|
||||
__host__ __device__ __forceinline__ Avg() {}
|
||||
__host__ __device__ __forceinline__ Avg(const Avg&) {}
|
||||
};
|
||||
|
||||
struct Min
|
||||
@ -125,8 +126,8 @@ namespace reduce
|
||||
return r;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Min() {}
|
||||
__device__ __forceinline__ Min(const Min&) {}
|
||||
__host__ __device__ __forceinline__ Min() {}
|
||||
__host__ __device__ __forceinline__ Min(const Min&) {}
|
||||
};
|
||||
|
||||
struct Max
|
||||
@ -150,8 +151,8 @@ namespace reduce
|
||||
return r;
|
||||
}
|
||||
|
||||
__device__ __forceinline__ Max() {}
|
||||
__device__ __forceinline__ Max(const Max&) {}
|
||||
__host__ __device__ __forceinline__ Max() {}
|
||||
__host__ __device__ __forceinline__ Max(const Max&) {}
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////
|
||||
|
@ -62,8 +62,8 @@ namespace arithm
|
||||
return vsub4(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub4() {}
|
||||
__device__ __forceinline__ VSub4(const VSub4& other) {}
|
||||
__host__ __device__ __forceinline__ VSub4() {}
|
||||
__host__ __device__ __forceinline__ VSub4(const VSub4&) {}
|
||||
};
|
||||
|
||||
struct VSub2 : binary_function<uint, uint, uint>
|
||||
@ -73,8 +73,8 @@ namespace arithm
|
||||
return vsub2(a, b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ VSub2() {}
|
||||
__device__ __forceinline__ VSub2(const VSub2& other) {}
|
||||
__host__ __device__ __forceinline__ VSub2() {}
|
||||
__host__ __device__ __forceinline__ VSub2(const VSub2&) {}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct SubMat : binary_function<T, T, D>
|
||||
@ -84,8 +84,8 @@ namespace arithm
|
||||
return saturate_cast<D>(a - b);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ SubMat() {}
|
||||
__device__ __forceinline__ SubMat(const SubMat& other) {}
|
||||
__host__ __device__ __forceinline__ SubMat() {}
|
||||
__host__ __device__ __forceinline__ SubMat(const SubMat&) {}
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -59,7 +59,7 @@ namespace arithm
|
||||
{
|
||||
S val;
|
||||
|
||||
explicit SubScalar(S val_) : val(val_) {}
|
||||
__host__ explicit SubScalar(S val_) : val(val_) {}
|
||||
|
||||
__device__ __forceinline__ D operator ()(T a) const
|
||||
{
|
||||
|
@ -45,6 +45,7 @@
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/vec_traits.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/reduce.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/utility.hpp"
|
||||
|
@ -72,7 +72,7 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
|
||||
|
||||
TEST_CYCLE() cv::gpu::blur(d_src, dst, cv::Size(ksize, ksize));
|
||||
|
||||
GPU_SANITY_CHECK(dst);
|
||||
GPU_SANITY_CHECK(dst, 1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -48,6 +48,7 @@
|
||||
#include "opencv2/core/cuda/common.hpp"
|
||||
#include "opencv2/core/cuda/emulation.hpp"
|
||||
#include "opencv2/core/cuda/vec_math.hpp"
|
||||
#include "opencv2/core/cuda/functional.hpp"
|
||||
#include "opencv2/core/cuda/limits.hpp"
|
||||
#include "opencv2/core/cuda/dynamic_smem.hpp"
|
||||
|
||||
@ -811,7 +812,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
const int ind = ::atomicAdd(r_sizes + n, 1);
|
||||
if (ind < maxSize)
|
||||
r_table(n, ind) = p - templCenter;
|
||||
r_table(n, ind) = saturate_cast<short2>(p - templCenter);
|
||||
}
|
||||
|
||||
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
|
||||
@ -855,7 +856,7 @@ namespace cv { namespace gpu { namespace cudev
|
||||
|
||||
for (int j = 0; j < r_row_size; ++j)
|
||||
{
|
||||
short2 c = p - r_row[j];
|
||||
int2 c = p - r_row[j];
|
||||
|
||||
c.x = __float2int_rn(c.x * idp);
|
||||
c.y = __float2int_rn(c.y * idp);
|
||||
|
@ -84,7 +84,7 @@ PERF_TEST_P(ImagePair, InterpolateFrames,
|
||||
|
||||
TEST_CYCLE() cv::gpu::interpolateFrames(d_frame0, d_frame1, d_fu, d_fv, d_bu, d_bv, 0.5f, newFrame, d_buf);
|
||||
|
||||
GPU_SANITY_CHECK(newFrame);
|
||||
GPU_SANITY_CHECK(newFrame, 1e-4);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -123,7 +123,7 @@ PERF_TEST_P(ImagePair, CreateOpticalFlowNeedleMap,
|
||||
|
||||
TEST_CYCLE() cv::gpu::createOpticalFlowNeedleMap(u, v, vertex, colors);
|
||||
|
||||
GPU_SANITY_CHECK(vertex);
|
||||
GPU_SANITY_CHECK(vertex, 1e-6);
|
||||
GPU_SANITY_CHECK(colors);
|
||||
}
|
||||
else
|
||||
@ -161,8 +161,8 @@ PERF_TEST_P(ImagePair, BroxOpticalFlow,
|
||||
|
||||
TEST_CYCLE() d_flow(d_frame0, d_frame1, u, v);
|
||||
|
||||
GPU_SANITY_CHECK(u);
|
||||
GPU_SANITY_CHECK(v);
|
||||
GPU_SANITY_CHECK(u, 1e-1);
|
||||
GPU_SANITY_CHECK(v, 1e-1);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -103,8 +103,8 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
|
||||
for (int i = 0; i < v_gold.rows; ++i)
|
||||
f.read(v_gold.ptr<char>(i), v_gold.cols * sizeof(float));
|
||||
|
||||
EXPECT_MAT_NEAR(u_gold, u, 0);
|
||||
EXPECT_MAT_NEAR(v_gold, v, 0);
|
||||
EXPECT_MAT_SIMILAR(u_gold, u, 1e-3);
|
||||
EXPECT_MAT_SIMILAR(v_gold, v, 1e-3);
|
||||
#else
|
||||
std::ofstream f(fname.c_str(), std::ios_base::binary);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user