[+] CUDA path for NCVImagePyramid
This commit is contained in:
parent
4136855ba8
commit
54337fd513
@ -331,6 +331,7 @@ enum
|
||||
NCV_HAAR_XML_LOADING_EXCEPTION,
|
||||
|
||||
NCV_NOIMPL_HAAR_TILTED_FEATURES,
|
||||
NCV_NOT_IMPLEMENTED,
|
||||
|
||||
NCV_WARNING_HAAR_DETECTIONS_VECTOR_OVERFLOW,
|
||||
|
||||
|
@ -45,6 +45,15 @@
|
||||
#include "NCV.hpp"
|
||||
|
||||
|
||||
template <class T>
|
||||
static void swap(T &p1, T &p2)
|
||||
{
|
||||
T tmp = p1;
|
||||
p1 = p2;
|
||||
p2 = tmp;
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
static T divUp(T a, T b)
|
||||
{
|
||||
|
@ -46,25 +46,25 @@
|
||||
#include <float.h>
|
||||
#include "NCV.hpp"
|
||||
|
||||
template<typename TBase> inline TBase _pixMaxVal();
|
||||
template<> static inline Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;}
|
||||
template<> static inline Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
|
||||
template<> static inline Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;}
|
||||
template<> static inline Ncv8s _pixMaxVal<Ncv8s>() {return CHAR_MAX;}
|
||||
template<> static inline Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;}
|
||||
template<> static inline Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;}
|
||||
template<> static inline Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;}
|
||||
template<> static inline Ncv64f _pixMaxVal<Ncv64f>() {return DBL_MAX;}
|
||||
template<typename TBase> inline __host__ __device__ TBase _pixMaxVal();
|
||||
template<> static inline __host__ __device__ Ncv8u _pixMaxVal<Ncv8u>() {return UCHAR_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv16u _pixMaxVal<Ncv16u>() {return USHRT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv32u _pixMaxVal<Ncv32u>() {return UINT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv8s _pixMaxVal<Ncv8s>() {return CHAR_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv16s _pixMaxVal<Ncv16s>() {return SHRT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv32s _pixMaxVal<Ncv32s>() {return INT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv32f _pixMaxVal<Ncv32f>() {return FLT_MAX;}
|
||||
template<> static inline __host__ __device__ Ncv64f _pixMaxVal<Ncv64f>() {return DBL_MAX;}
|
||||
|
||||
template<typename TBase> inline TBase _pixMinVal();
|
||||
template<> static inline Ncv8u _pixMinVal<Ncv8u>() {return 0;}
|
||||
template<> static inline Ncv16u _pixMinVal<Ncv16u>() {return 0;}
|
||||
template<> static inline Ncv32u _pixMinVal<Ncv32u>() {return 0;}
|
||||
template<> static inline Ncv8s _pixMinVal<Ncv8s>() {return CHAR_MIN;}
|
||||
template<> static inline Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
|
||||
template<> static inline Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
|
||||
template<> static inline Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
|
||||
template<> static inline Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
|
||||
template<typename TBase> inline __host__ __device__ TBase _pixMinVal();
|
||||
template<> static inline __host__ __device__ Ncv8u _pixMinVal<Ncv8u>() {return 0;}
|
||||
template<> static inline __host__ __device__ Ncv16u _pixMinVal<Ncv16u>() {return 0;}
|
||||
template<> static inline __host__ __device__ Ncv32u _pixMinVal<Ncv32u>() {return 0;}
|
||||
template<> static inline __host__ __device__ Ncv8s _pixMinVal<Ncv8s>() {return CHAR_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv16s _pixMinVal<Ncv16s>() {return SHRT_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv32s _pixMinVal<Ncv32s>() {return INT_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv32f _pixMinVal<Ncv32f>() {return FLT_MIN;}
|
||||
template<> static inline __host__ __device__ Ncv64f _pixMinVal<Ncv64f>() {return DBL_MIN;}
|
||||
|
||||
template<typename Tvec> struct TConvVec2Base;
|
||||
template<> struct TConvVec2Base<uchar1> {typedef Ncv8u TBase;};
|
||||
@ -103,55 +103,55 @@ template<> struct TConvBase2Vec<Ncv64f, 3> {typedef double3 TVec;};
|
||||
template<> struct TConvBase2Vec<Ncv64f, 4> {typedef double4 TVec;};
|
||||
|
||||
//TODO: consider using CUDA intrinsics to avoid branching
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a);};
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampZ(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
|
||||
//TODO: consider using CUDA intrinsics to avoid branching
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv8u &out) {out = (Ncv8u)CLAMP_0_255(a+0.5f);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv16u &out) {out = (Ncv16u)CLAMP(a+0.5f, 0, USHRT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32u &out) {out = (Ncv32u)CLAMP(a+0.5f, 0, UINT_MAX);}
|
||||
template<typename Tin> static inline __host__ __device__ void _TDemoteClampNN(Tin &a, Ncv32f &out) {out = (Ncv32f)a;}
|
||||
|
||||
template<typename Tout> inline Tout _pixMakeZero();
|
||||
template<> static inline uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
|
||||
template<> static inline uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
|
||||
template<> static inline uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
|
||||
template<> static inline ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
|
||||
template<> static inline ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
|
||||
template<> static inline ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
|
||||
template<> static inline uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
|
||||
template<> static inline uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
|
||||
template<> static inline uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
|
||||
template<> static inline float1 _pixMakeZero<float1>() {return make_float1(0.f);}
|
||||
template<> static inline float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
|
||||
template<> static inline float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
|
||||
template<> static inline double1 _pixMakeZero<double1>() {return make_double1(0.);}
|
||||
template<> static inline double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
|
||||
template<> static inline double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
|
||||
template<> static inline __host__ __device__ uchar1 _pixMakeZero<uchar1>() {return make_uchar1(0);}
|
||||
template<> static inline __host__ __device__ uchar3 _pixMakeZero<uchar3>() {return make_uchar3(0,0,0);}
|
||||
template<> static inline __host__ __device__ uchar4 _pixMakeZero<uchar4>() {return make_uchar4(0,0,0,0);}
|
||||
template<> static inline __host__ __device__ ushort1 _pixMakeZero<ushort1>() {return make_ushort1(0);}
|
||||
template<> static inline __host__ __device__ ushort3 _pixMakeZero<ushort3>() {return make_ushort3(0,0,0);}
|
||||
template<> static inline __host__ __device__ ushort4 _pixMakeZero<ushort4>() {return make_ushort4(0,0,0,0);}
|
||||
template<> static inline __host__ __device__ uint1 _pixMakeZero<uint1>() {return make_uint1(0);}
|
||||
template<> static inline __host__ __device__ uint3 _pixMakeZero<uint3>() {return make_uint3(0,0,0);}
|
||||
template<> static inline __host__ __device__ uint4 _pixMakeZero<uint4>() {return make_uint4(0,0,0,0);}
|
||||
template<> static inline __host__ __device__ float1 _pixMakeZero<float1>() {return make_float1(0.f);}
|
||||
template<> static inline __host__ __device__ float3 _pixMakeZero<float3>() {return make_float3(0.f,0.f,0.f);}
|
||||
template<> static inline __host__ __device__ float4 _pixMakeZero<float4>() {return make_float4(0.f,0.f,0.f,0.f);}
|
||||
template<> static inline __host__ __device__ double1 _pixMakeZero<double1>() {return make_double1(0.);}
|
||||
template<> static inline __host__ __device__ double3 _pixMakeZero<double3>() {return make_double3(0.,0.,0.);}
|
||||
template<> static inline __host__ __device__ double4 _pixMakeZero<double4>() {return make_double4(0.,0.,0.,0.);}
|
||||
|
||||
static inline uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
|
||||
static inline uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
|
||||
static inline uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
|
||||
static inline ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
|
||||
static inline ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
|
||||
static inline ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
|
||||
static inline uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
|
||||
static inline uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
|
||||
static inline uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
|
||||
static inline float1 _pixMake(Ncv32f x) {return make_float1(x);}
|
||||
static inline float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
|
||||
static inline float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
|
||||
static inline double1 _pixMake(Ncv64f x) {return make_double1(x);}
|
||||
static inline double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
|
||||
static inline double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
|
||||
static inline __host__ __device__ uchar1 _pixMake(Ncv8u x) {return make_uchar1(x);}
|
||||
static inline __host__ __device__ uchar3 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z) {return make_uchar3(x,y,z);}
|
||||
static inline __host__ __device__ uchar4 _pixMake(Ncv8u x, Ncv8u y, Ncv8u z, Ncv8u w) {return make_uchar4(x,y,z,w);}
|
||||
static inline __host__ __device__ ushort1 _pixMake(Ncv16u x) {return make_ushort1(x);}
|
||||
static inline __host__ __device__ ushort3 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z) {return make_ushort3(x,y,z);}
|
||||
static inline __host__ __device__ ushort4 _pixMake(Ncv16u x, Ncv16u y, Ncv16u z, Ncv16u w) {return make_ushort4(x,y,z,w);}
|
||||
static inline __host__ __device__ uint1 _pixMake(Ncv32u x) {return make_uint1(x);}
|
||||
static inline __host__ __device__ uint3 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z) {return make_uint3(x,y,z);}
|
||||
static inline __host__ __device__ uint4 _pixMake(Ncv32u x, Ncv32u y, Ncv32u z, Ncv32u w) {return make_uint4(x,y,z,w);}
|
||||
static inline __host__ __device__ float1 _pixMake(Ncv32f x) {return make_float1(x);}
|
||||
static inline __host__ __device__ float3 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z) {return make_float3(x,y,z);}
|
||||
static inline __host__ __device__ float4 _pixMake(Ncv32f x, Ncv32f y, Ncv32f z, Ncv32f w) {return make_float4(x,y,z,w);}
|
||||
static inline __host__ __device__ double1 _pixMake(Ncv64f x) {return make_double1(x);}
|
||||
static inline __host__ __device__ double3 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z) {return make_double3(x,y,z);}
|
||||
static inline __host__ __device__ double4 _pixMake(Ncv64f x, Ncv64f y, Ncv64f z, Ncv64f w) {return make_double4(x,y,z,w);}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static Tout _pixDemoteClampZ_CN(Tin &pix);};
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampZ_CN {static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 1> {
|
||||
static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
@ -159,7 +159,7 @@ static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 3> {
|
||||
static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
@ -169,7 +169,7 @@ static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampZ_CN<Tin, Tout, 4> {
|
||||
static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
static __host__ __device__ Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampZ(pix.x, out.x);
|
||||
@ -179,16 +179,16 @@ static Tout _pixDemoteClampZ_CN(Tin &pix)
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static inline Tout _pixDemoteClampZ(Tin &pix)
|
||||
template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampZ(Tin &pix)
|
||||
{
|
||||
return __pixDemoteClampZ_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampZ_CN(pix);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static Tout _pixDemoteClampNN_CN(Tin &pix);};
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDemoteClampNN_CN {static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 1> {
|
||||
static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
@ -196,7 +196,7 @@ static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 3> {
|
||||
static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
@ -206,7 +206,7 @@ static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDemoteClampNN_CN<Tin, Tout, 4> {
|
||||
static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
static __host__ __device__ Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
{
|
||||
Tout out;
|
||||
_TDemoteClampNN(pix.x, out.x);
|
||||
@ -216,16 +216,16 @@ static Tout _pixDemoteClampNN_CN(Tin &pix)
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static inline Tout _pixDemoteClampNN(Tin &pix)
|
||||
template<typename Tin, typename Tout> static inline __host__ __device__ Tout _pixDemoteClampNN(Tin &pix)
|
||||
{
|
||||
return __pixDemoteClampNN_CN<Tin, Tout, NC(Tin)>::_pixDemoteClampNN_CN(pix);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static Tout _pixScale_CN(Tin &pix, Tw w);};
|
||||
template<typename Tin, typename Tout, typename Tw, Ncv32u CN> struct __pixScale_CN {static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w);};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 1> {
|
||||
static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
@ -234,7 +234,7 @@ static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 3> {
|
||||
static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
@ -245,7 +245,7 @@ static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> struct __pixScale_CN<Tin, Tout, Tw, 4> {
|
||||
static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
static __host__ __device__ Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
{
|
||||
Tout out;
|
||||
typedef typename TConvVec2Base<Tout>::TBase TBout;
|
||||
@ -256,16 +256,16 @@ static Tout _pixScale_CN(Tin &pix, Tw w)
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout, typename Tw> static Tout _pixScale(Tin &pix, Tw w)
|
||||
template<typename Tin, typename Tout, typename Tw> static __host__ __device__ Tout _pixScale(Tin &pix, Tw w)
|
||||
{
|
||||
return __pixScale_CN<Tin, Tout, Tw, NC(Tin)>::_pixScale_CN(pix, w);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixAdd_CN {static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 1> {
|
||||
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
@ -273,7 +273,7 @@ static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 3> {
|
||||
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
@ -283,7 +283,7 @@ static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixAdd_CN<Tin, Tout, 4> {
|
||||
static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
static __host__ __device__ Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
Tout out;
|
||||
out.x = pix1.x + pix2.x;
|
||||
@ -293,33 +293,33 @@ static Tout _pixAdd_CN(Tout &pix1, Tin &pix2)
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static Tout _pixAdd(Tout &pix1, Tin &pix2)
|
||||
template<typename Tin, typename Tout> static __host__ __device__ Tout _pixAdd(Tout &pix1, Tin &pix2)
|
||||
{
|
||||
return __pixAdd_CN<Tin, Tout, NC(Tin)>::_pixAdd_CN(pix1, pix2);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __pixDist_CN {static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 1> {
|
||||
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 3> {
|
||||
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __pixDist_CN<Tin, Tout, 4> {
|
||||
static Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
static __host__ __device__ Tout _pixDist_CN(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return Tout(SQR(pix1.x - pix2.x) + SQR(pix1.y - pix2.y) + SQR(pix1.z - pix2.z) + SQR(pix1.w - pix2.w));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static Tout _pixDist(Tin &pix1, Tin &pix2)
|
||||
template<typename Tin, typename Tout> static __host__ __device__ Tout _pixDist(Tin &pix1, Tin &pix2)
|
||||
{
|
||||
return __pixDist_CN<Tin, Tout, NC(Tin)>::_pixDist_CN(pix1, pix2);
|
||||
}
|
||||
|
@ -43,15 +43,16 @@
|
||||
#include <cuda_runtime.h>
|
||||
#include <stdio.h>
|
||||
#include "NCV.hpp"
|
||||
#include "NCVAlg.hpp"
|
||||
#include "NCVPyramid.hpp"
|
||||
#include "NCVPixelOperations.hpp"
|
||||
|
||||
#ifdef _WIN32
|
||||
#ifdef _WIN32
|
||||
|
||||
template<typename T, Ncv32u CN> struct __average4_CN {static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);};
|
||||
template<typename T, Ncv32u CN> struct __average4_CN {static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11);};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 1> {
|
||||
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
@ -59,7 +60,7 @@ static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float1, 1> {
|
||||
static float1 _average4_CN(const float1 &p00, const float1 &p01, const float1 &p10, const float1 &p11)
|
||||
static __host__ __device__ float1 _average4_CN(const float1 &p00, const float1 &p01, const float1 &p10, const float1 &p11)
|
||||
{
|
||||
float1 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
@ -67,7 +68,7 @@ static float1 _average4_CN(const float1 &p00, const float1 &p01, const float1 &p
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double1, 1> {
|
||||
static double1 _average4_CN(const double1 &p00, const double1 &p01, const double1 &p10, const double1 &p11)
|
||||
static __host__ __device__ double1 _average4_CN(const double1 &p00, const double1 &p01, const double1 &p10, const double1 &p11)
|
||||
{
|
||||
double1 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
@ -75,7 +76,7 @@ static double1 _average4_CN(const double1 &p00, const double1 &p01, const double
|
||||
}};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 3> {
|
||||
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
@ -85,7 +86,7 @@ static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float3, 3> {
|
||||
static float3 _average4_CN(const float3 &p00, const float3 &p01, const float3 &p10, const float3 &p11)
|
||||
static __host__ __device__ float3 _average4_CN(const float3 &p00, const float3 &p01, const float3 &p10, const float3 &p11)
|
||||
{
|
||||
float3 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
@ -95,7 +96,7 @@ static float3 _average4_CN(const float3 &p00, const float3 &p01, const float3 &p
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double3, 3> {
|
||||
static double3 _average4_CN(const double3 &p00, const double3 &p01, const double3 &p10, const double3 &p11)
|
||||
static __host__ __device__ double3 _average4_CN(const double3 &p00, const double3 &p01, const double3 &p10, const double3 &p11)
|
||||
{
|
||||
double3 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
@ -105,7 +106,7 @@ static double3 _average4_CN(const double3 &p00, const double3 &p01, const double
|
||||
}};
|
||||
|
||||
template<typename T> struct __average4_CN<T, 4> {
|
||||
static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
static __host__ __device__ T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
T out;
|
||||
out.x = ((Ncv32s)p00.x + p01.x + p10.x + p11.x + 2) / 4;
|
||||
@ -116,7 +117,7 @@ static T _average4_CN(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<float4, 4> {
|
||||
static float4 _average4_CN(const float4 &p00, const float4 &p01, const float4 &p10, const float4 &p11)
|
||||
static __host__ __device__ float4 _average4_CN(const float4 &p00, const float4 &p01, const float4 &p10, const float4 &p11)
|
||||
{
|
||||
float4 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
@ -127,7 +128,7 @@ static float4 _average4_CN(const float4 &p00, const float4 &p01, const float4 &p
|
||||
}};
|
||||
|
||||
template<> struct __average4_CN<double4, 4> {
|
||||
static double4 _average4_CN(const double4 &p00, const double4 &p01, const double4 &p10, const double4 &p11)
|
||||
static __host__ __device__ double4 _average4_CN(const double4 &p00, const double4 &p01, const double4 &p10, const double4 &p11)
|
||||
{
|
||||
double4 out;
|
||||
out.x = (p00.x + p01.x + p10.x + p11.x) / 4;
|
||||
@ -137,23 +138,23 @@ static double4 _average4_CN(const double4 &p00, const double4 &p01, const double
|
||||
return out;
|
||||
}};
|
||||
|
||||
template<typename T> static T _average4(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
template<typename T> static __host__ __device__ T _average4(const T &p00, const T &p01, const T &p10, const T &p11)
|
||||
{
|
||||
return __average4_CN<T, NC(T)>::_average4_CN(p00, p01, p10, p11);
|
||||
}
|
||||
|
||||
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __lerp_CN {static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d);};
|
||||
template<typename Tin, typename Tout, Ncv32u CN> struct __lerp_CN {static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d);};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 1> {
|
||||
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 3> {
|
||||
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)),
|
||||
@ -162,7 +163,7 @@ static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> struct __lerp_CN<Tin, Tout, 4> {
|
||||
static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
static __host__ __device__ Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
typedef typename TConvVec2Base<Tout>::TBase TB;
|
||||
return _pixMake(TB(b.x * d + a.x * (1 - d)),
|
||||
@ -171,7 +172,7 @@ static Tout _lerp_CN(const Tin &a, const Tin &b, Ncv32f d)
|
||||
TB(b.w * d + a.w * (1 - d)));
|
||||
}};
|
||||
|
||||
template<typename Tin, typename Tout> static Tout _lerp(const Tin &a, const Tin &b, Ncv32f d)
|
||||
template<typename Tin, typename Tout> static __host__ __device__ Tout _lerp(const Tin &a, const Tin &b, Ncv32f d)
|
||||
{
|
||||
return __lerp_CN<Tin, Tout, NC(Tin)>::_lerp_CN(a, b, d);
|
||||
}
|
||||
@ -208,6 +209,74 @@ static T _interpBilinear(const NCVMatrix<T> &refLayer, Ncv32f x, Ncv32f y)
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
__global__ void kernelDownsampleX2(T *d_src,
|
||||
Ncv32u srcPitch,
|
||||
T *d_dst,
|
||||
Ncv32u dstPitch,
|
||||
NcvSize32u dstRoi)
|
||||
{
|
||||
Ncv32u i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
Ncv32u j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (i < dstRoi.height && j < dstRoi.width)
|
||||
{
|
||||
T *d_src_line1 = (T *)((Ncv8u *)d_src + (2 * i + 0) * srcPitch);
|
||||
T *d_src_line2 = (T *)((Ncv8u *)d_src + (2 * i + 1) * srcPitch);
|
||||
T *d_dst_line = (T *)((Ncv8u *)d_dst + i * dstPitch);
|
||||
|
||||
T p00 = d_src_line1[2*j+0];
|
||||
T p01 = d_src_line1[2*j+1];
|
||||
T p10 = d_src_line2[2*j+0];
|
||||
T p11 = d_src_line2[2*j+1];
|
||||
|
||||
d_dst_line[j] = _average4(p00, p01, p10, p11);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<typename T>
|
||||
__global__ void kernelInterpolateFrom1(T *d_srcTop,
|
||||
Ncv32u srcTopPitch,
|
||||
NcvSize32u szTopRoi,
|
||||
T *d_dst,
|
||||
Ncv32u dstPitch,
|
||||
NcvSize32u dstRoi)
|
||||
{
|
||||
Ncv32u i = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
Ncv32u j = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
|
||||
if (i < dstRoi.height && j < dstRoi.width)
|
||||
{
|
||||
Ncv32f ptTopX = 1.0f * (szTopRoi.width - 1) * j / (dstRoi.width - 1);
|
||||
Ncv32f ptTopY = 1.0f * (szTopRoi.height - 1) * i / (dstRoi.height - 1);
|
||||
Ncv32u xl = (Ncv32u)ptTopX;
|
||||
Ncv32u xh = xl+1;
|
||||
Ncv32f dx = ptTopX - xl;
|
||||
Ncv32u yl = (Ncv32u)ptTopY;
|
||||
Ncv32u yh = yl+1;
|
||||
Ncv32f dy = ptTopY - yl;
|
||||
|
||||
T *d_src_line1 = (T *)((Ncv8u *)d_srcTop + yl * srcTopPitch);
|
||||
T *d_src_line2 = (T *)((Ncv8u *)d_srcTop + yh * srcTopPitch);
|
||||
T *d_dst_line = (T *)((Ncv8u *)d_dst + i * dstPitch);
|
||||
|
||||
T p00, p01, p10, p11;
|
||||
p00 = d_src_line1[xl];
|
||||
p01 = xh < szTopRoi.width ? d_src_line1[xh] : p00;
|
||||
p10 = yh < szTopRoi.height ? d_src_line2[xl] : p00;
|
||||
p11 = (xh < szTopRoi.width && yh < szTopRoi.height) ? d_src_line2[xh] : p00;
|
||||
typedef typename TConvBase2Vec<Ncv32f, NC(T)>::TVec TVFlt;
|
||||
TVFlt m_00_01 = _lerp<T, TVFlt>(p00, p01, dx);
|
||||
TVFlt m_10_11 = _lerp<T, TVFlt>(p10, p11, dx);
|
||||
TVFlt mixture = _lerp<TVFlt, TVFlt>(m_00_01, m_10_11, dy);
|
||||
T outPix = _pixDemoteClampZ<TVFlt, T>(mixture);
|
||||
|
||||
d_dst_line[j] = outPix;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <class T>
|
||||
NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
Ncv8u numLayers,
|
||||
@ -215,7 +284,7 @@ NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
cudaStream_t cuStream)
|
||||
{
|
||||
this->_isInitialized = false;
|
||||
ncvAssertPrintReturn(img.memType() == alloc.memType(), "NCVImagePyramid_host::ctor error", );
|
||||
ncvAssertPrintReturn(img.memType() == alloc.memType(), "NCVImagePyramid::ctor error", );
|
||||
|
||||
this->layer0 = &img;
|
||||
NcvSize32u szLastLayer(img.width(), img.height());
|
||||
@ -229,6 +298,10 @@ NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
numLayers = 255; //it will cut-off when any of the dimensions goes 1
|
||||
}
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
NCVMemNativeAllocator allocCPU(NCVMemoryTypeHostPinned, 512);
|
||||
#endif
|
||||
|
||||
for (Ncv32u i=0; i<(Ncv32u)numLayers-1; i++)
|
||||
{
|
||||
NcvSize32u szCurLayer(szLastLayer.width / 2, szLastLayer.height / 2);
|
||||
@ -238,7 +311,7 @@ NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
}
|
||||
|
||||
this->pyramid.push_back(new NCVMatrixAlloc<T>(alloc, szCurLayer.width, szCurLayer.height));
|
||||
ncvAssertPrintReturn(((NCVMatrixAlloc<T> *)(this->pyramid[i]))->isMemAllocated(), "NCVImagePyramid_host::ctor error", );
|
||||
ncvAssertPrintReturn(((NCVMatrixAlloc<T> *)(this->pyramid[i]))->isMemAllocated(), "NCVImagePyramid::ctor error", );
|
||||
this->nLayers++;
|
||||
|
||||
//fill in the layer
|
||||
@ -249,7 +322,37 @@ NCVImagePyramid<T>::NCVImagePyramid(const NCVMatrix<T> &img,
|
||||
|
||||
if (bDeviceCode)
|
||||
{
|
||||
//TODO: in cuStream
|
||||
dim3 bDim(16, 8);
|
||||
dim3 gDim(divUp(szCurLayer.width, bDim.x), divUp(szCurLayer.height, bDim.y));
|
||||
kernelDownsampleX2<<<gDim, bDim, 0, cuStream>>>(prevLayer->ptr(),
|
||||
prevLayer->pitch(),
|
||||
curLayer->ptr(),
|
||||
curLayer->pitch(),
|
||||
szCurLayer);
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaGetLastError(), "NCVImagePyramid::ctor error", );
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
NCVMatrixAlloc<T> h_prevLayer(allocCPU, prevLayer->width(), prevLayer->height());
|
||||
ncvAssertPrintReturn(h_prevLayer.isMemAllocated(), "Validation failure in NCVImagePyramid::ctor", );
|
||||
NCVMatrixAlloc<T> h_curLayer(allocCPU, curLayer->width(), curLayer->height());
|
||||
ncvAssertPrintReturn(h_curLayer.isMemAllocated(), "Validation failure in NCVImagePyramid::ctor", );
|
||||
ncvAssertPrintReturn(NCV_SUCCESS == prevLayer->copy2D(h_prevLayer, prevLayer->size(), cuStream), "Validation failure in NCVImagePyramid::ctor", );
|
||||
ncvAssertPrintReturn(NCV_SUCCESS == curLayer->copy2D(h_curLayer, curLayer->size(), cuStream), "Validation failure in NCVImagePyramid::ctor", );
|
||||
ncvAssertPrintReturn(cudaSuccess == cudaStreamSynchronize(cuStream), "Validation failure in NCVImagePyramid::ctor", );
|
||||
for (Ncv32u i=0; i<szCurLayer.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<szCurLayer.width; j++)
|
||||
{
|
||||
T p00 = h_prevLayer.at(2*j+0, 2*i+0);
|
||||
T p01 = h_prevLayer.at(2*j+1, 2*i+0);
|
||||
T p10 = h_prevLayer.at(2*j+0, 2*i+1);
|
||||
T p11 = h_prevLayer.at(2*j+1, 2*i+1);
|
||||
T outGold = _average4(p00, p01, p10, p11);
|
||||
T outGPU = h_curLayer.at(j, i);
|
||||
ncvAssertPrintReturn(0 == memcmp(&outGold, &outGPU, sizeof(T)), "Validation failure in NCVImagePyramid::ctor with kernelDownsampleX2", );
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -340,11 +443,45 @@ NCVStatus NCVImagePyramid<T>::getLayer(NCVMatrix<T> &outImg,
|
||||
NCV_SET_SKIP_COND(outImg.memType() == NCVMemoryTypeNone);
|
||||
NcvBool bDeviceCode = this->layer0->memType() == NCVMemoryTypeDevice;
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
NCVMemNativeAllocator allocCPU(NCVMemoryTypeHostPinned, 512);
|
||||
#endif
|
||||
|
||||
NCV_SKIP_COND_BEGIN
|
||||
|
||||
if (bDeviceCode)
|
||||
{
|
||||
//TODO: in cuStream
|
||||
ncvAssertReturn(bUse2Refs == false, NCV_NOT_IMPLEMENTED);
|
||||
|
||||
dim3 bDim(16, 8);
|
||||
dim3 gDim(divUp(outRoi.width, bDim.x), divUp(outRoi.height, bDim.y));
|
||||
kernelInterpolateFrom1<<<gDim, bDim, 0, cuStream>>>(lastLayer->ptr(),
|
||||
lastLayer->pitch(),
|
||||
lastLayer->size(),
|
||||
outImg.ptr(),
|
||||
outImg.pitch(),
|
||||
outRoi);
|
||||
ncvAssertCUDAReturn(cudaGetLastError(), NCV_CUDA_ERROR);
|
||||
|
||||
#ifdef SELF_CHECK_GPU
|
||||
ncvSafeMatAlloc(h_lastLayer, T, allocCPU, lastLayer->width(), lastLayer->height(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
ncvSafeMatAlloc(h_outImg, T, allocCPU, outImg.width(), outImg.height(), NCV_ALLOCATOR_BAD_ALLOC);
|
||||
ncvAssertReturnNcvStat(lastLayer->copy2D(h_lastLayer, lastLayer->size(), cuStream));
|
||||
ncvAssertReturnNcvStat(outImg.copy2D(h_outImg, outRoi, cuStream));
|
||||
ncvAssertCUDAReturn(cudaStreamSynchronize(cuStream), NCV_CUDA_ERROR);
|
||||
|
||||
for (Ncv32u i=0; i<outRoi.height; i++)
|
||||
{
|
||||
for (Ncv32u j=0; j<outRoi.width; j++)
|
||||
{
|
||||
NcvSize32u szTopLayer(lastLayer->width(), lastLayer->height());
|
||||
Ncv32f ptTopX = 1.0f * (szTopLayer.width - 1) * j / (outRoi.width - 1);
|
||||
Ncv32f ptTopY = 1.0f * (szTopLayer.height - 1) * i / (outRoi.height - 1);
|
||||
T outGold = _interpBilinear(h_lastLayer, ptTopX, ptTopY);
|
||||
ncvAssertPrintReturn(0 == memcmp(&outGold, &h_outImg.at(j,i), sizeof(T)), "Validation failure in NCVImagePyramid::ctor with kernelInterpolateFrom1", NCV_UNKNOWN_ERROR);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -395,6 +532,6 @@ template class NCVImagePyramid<uint3>;
|
||||
template class NCVImagePyramid<uint4>;
|
||||
template class NCVImagePyramid<float1>;
|
||||
template class NCVImagePyramid<float3>;
|
||||
template class NCVImagePyramid<float4>;
|
||||
|
||||
template class NCVImagePyramid<float4>;
|
||||
|
||||
#endif //_WIN32
|
||||
|
Loading…
x
Reference in New Issue
Block a user