added __forceinline__ to device functions
fixed BFM warning ("cannot tell what pointer points to")
This commit is contained in:
@@ -42,6 +42,7 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
@@ -60,7 +61,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
public:
|
||||
explicit SingleMask(const PtrStep& mask_) : mask(mask_) {}
|
||||
|
||||
__device__ bool operator()(int queryIdx, int trainIdx) const
|
||||
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
|
||||
{
|
||||
return mask.ptr(queryIdx)[trainIdx] != 0;
|
||||
}
|
||||
@@ -74,14 +75,15 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
public:
|
||||
explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {}
|
||||
|
||||
__device__ void nextMask()
|
||||
__device__ __forceinline__ void nextMask()
|
||||
{
|
||||
curMask = *maskCollection++;
|
||||
}
|
||||
|
||||
__device__ bool operator()(int queryIdx, int trainIdx) const
|
||||
{
|
||||
return curMask.data == 0 || curMask.ptr(queryIdx)[trainIdx] != 0;
|
||||
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
|
||||
{
|
||||
uchar val;
|
||||
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(queryIdx), trainIdx, val), (val != 0));
|
||||
}
|
||||
|
||||
private:
|
||||
@@ -92,10 +94,10 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
class WithOutMask
|
||||
{
|
||||
public:
|
||||
__device__ void nextMask()
|
||||
__device__ __forceinline__ void nextMask()
|
||||
{
|
||||
}
|
||||
__device__ bool operator()(int queryIdx, int trainIdx) const
|
||||
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -132,19 +134,19 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
typedef int ResultType;
|
||||
typedef int ValueType;
|
||||
|
||||
__device__ L1Dist() : mySum(0) {}
|
||||
__device__ __forceinline__ L1Dist() : mySum(0) {}
|
||||
|
||||
__device__ void reduceIter(int val1, int val2)
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum = __sad(val1, val2, mySum);
|
||||
}
|
||||
|
||||
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row)
|
||||
template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(int* sdiff_row)
|
||||
{
|
||||
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
|
||||
}
|
||||
|
||||
__device__ operator int() const
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
@@ -158,19 +160,19 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
typedef float ResultType;
|
||||
typedef float ValueType;
|
||||
|
||||
__device__ L1Dist() : mySum(0.0f) {}
|
||||
__device__ __forceinline__ L1Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ void reduceIter(float val1, float val2)
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
mySum += fabs(val1 - val2);
|
||||
}
|
||||
|
||||
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row)
|
||||
template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(float* sdiff_row)
|
||||
{
|
||||
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
|
||||
}
|
||||
|
||||
__device__ operator float() const
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
@@ -185,20 +187,20 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
typedef float ResultType;
|
||||
typedef float ValueType;
|
||||
|
||||
__device__ L2Dist() : mySum(0.0f) {}
|
||||
__device__ __forceinline__ L2Dist() : mySum(0.0f) {}
|
||||
|
||||
__device__ void reduceIter(float val1, float val2)
|
||||
__device__ __forceinline__ void reduceIter(float val1, float val2)
|
||||
{
|
||||
float reg = val1 - val2;
|
||||
mySum += reg * reg;
|
||||
}
|
||||
|
||||
template <int BLOCK_DIM_X> __device__ void reduceAll(float* sdiff_row)
|
||||
template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(float* sdiff_row)
|
||||
{
|
||||
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
|
||||
}
|
||||
|
||||
__device__ operator float() const
|
||||
__device__ __forceinline__ operator float() const
|
||||
{
|
||||
return sqrtf(mySum);
|
||||
}
|
||||
@@ -213,19 +215,19 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
typedef int ResultType;
|
||||
typedef int ValueType;
|
||||
|
||||
__device__ HammingDist() : mySum(0) {}
|
||||
__device__ __forceinline__ HammingDist() : mySum(0) {}
|
||||
|
||||
__device__ void reduceIter(int val1, int val2)
|
||||
__device__ __forceinline__ void reduceIter(int val1, int val2)
|
||||
{
|
||||
mySum += __popc(val1 ^ val2);
|
||||
}
|
||||
|
||||
template <int BLOCK_DIM_X> __device__ void reduceAll(int* sdiff_row)
|
||||
template <int BLOCK_DIM_X> __device__ __forceinline__ void reduceAll(int* sdiff_row)
|
||||
{
|
||||
SumReductor<BLOCK_DIM_X>::reduce(sdiff_row, mySum);
|
||||
}
|
||||
|
||||
__device__ operator int() const
|
||||
__device__ __forceinline__ operator int() const
|
||||
{
|
||||
return mySum;
|
||||
}
|
||||
@@ -241,7 +243,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
__device__ void reduceDescDiff(const T* queryDescs, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
|
||||
{
|
||||
for (int i = threadIdx.x; i < desc_len; i += BLOCK_DIM_X)
|
||||
dist.reduceIter(queryDescs[i], trainDescs[i]);
|
||||
{
|
||||
T trainVal;
|
||||
ForceGlob<T>::Load(trainDescs, i, trainVal);
|
||||
dist.reduceIter(queryDescs[i], trainVal);
|
||||
}
|
||||
|
||||
dist.reduceAll<BLOCK_DIM_X>(sdiff_row);
|
||||
}
|
||||
@@ -282,7 +288,9 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
if (ind < desc_len)
|
||||
{
|
||||
dist.reduceIter(*queryVals, trainDescs[ind]);
|
||||
T trainVal;
|
||||
ForceGlob<T>::Load(trainDescs, ind, trainVal);
|
||||
dist.reduceIter(*queryVals, trainVal);
|
||||
|
||||
++queryVals;
|
||||
|
||||
@@ -293,7 +301,9 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
template <typename Dist, typename T>
|
||||
static __device__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
|
||||
{
|
||||
dist.reduceIter(*queryVals, *trainDescs);
|
||||
T trainVal;
|
||||
ForceGlob<T>::Load(trainDescs, 0, trainVal);
|
||||
dist.reduceIter(*queryVals, trainVal);
|
||||
|
||||
++queryVals;
|
||||
trainDescs += blockDim.x;
|
||||
@@ -304,13 +314,13 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
template <> struct UnrollDescDiff<0>
|
||||
{
|
||||
template <typename Dist, typename T>
|
||||
static __device__ void calcCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len,
|
||||
static __device__ __forceinline__ void calcCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len,
|
||||
Dist& dist, int ind)
|
||||
{
|
||||
}
|
||||
|
||||
template <typename Dist, typename T>
|
||||
static __device__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
|
||||
static __device__ __forceinline__ void calcWithoutCheck(const typename Dist::ValueType* queryVals, const T* trainDescs, Dist& dist)
|
||||
{
|
||||
}
|
||||
};
|
||||
@@ -320,7 +330,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, false>
|
||||
{
|
||||
template <typename Dist, typename T>
|
||||
static __device__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
|
||||
static __device__ __forceinline__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
|
||||
{
|
||||
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcCheck(queryVals, trainDescs, desc_len, dist, threadIdx.x);
|
||||
}
|
||||
@@ -329,14 +339,14 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
struct DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, true>
|
||||
{
|
||||
template <typename Dist, typename T>
|
||||
static __device__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
|
||||
static __device__ __forceinline__ void calc(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist)
|
||||
{
|
||||
UnrollDescDiff<MAX_DESCRIPTORS_LEN / BLOCK_DIM_X>::calcWithoutCheck(queryVals, trainDescs + threadIdx.x, dist);
|
||||
}
|
||||
};
|
||||
|
||||
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename Dist, typename T>
|
||||
__device__ void reduceDescDiffCached(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
|
||||
__device__ __forceinline__ void reduceDescDiffCached(const typename Dist::ValueType* queryVals, const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row)
|
||||
{
|
||||
DescDiffCalculator<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>::calc(queryVals, trainDescs, desc_len, dist);
|
||||
|
||||
@@ -419,13 +429,13 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
class ReduceDescCalculatorSimple
|
||||
{
|
||||
public:
|
||||
__device__ void prepare(const T* queryDescs_, int, void*)
|
||||
__device__ __forceinline__ void prepare(const T* queryDescs_, int, void*)
|
||||
{
|
||||
queryDescs = queryDescs_;
|
||||
}
|
||||
|
||||
template <typename Dist>
|
||||
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
|
||||
__device__ __forceinline__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
|
||||
{
|
||||
reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row);
|
||||
}
|
||||
@@ -438,13 +448,13 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
class ReduceDescCalculatorCached
|
||||
{
|
||||
public:
|
||||
__device__ void prepare(const T* queryDescs, int desc_len, U* smem)
|
||||
__device__ __forceinline__ void prepare(const T* queryDescs, int desc_len, U* smem)
|
||||
{
|
||||
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
|
||||
}
|
||||
|
||||
template <typename Dist>
|
||||
__device__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
|
||||
__device__ __forceinline__ void calc(const T* trainDescs, int desc_len, Dist& dist, typename Dist::ResultType* sdiff_row) const
|
||||
{
|
||||
reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row);
|
||||
}
|
||||
@@ -496,13 +506,13 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
}
|
||||
|
||||
template <typename Dist, typename ReduceDescCalculator, typename Mask>
|
||||
__device__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
|
||||
__device__ __forceinline__ void loop(int queryIdx, Mask& m, const ReduceDescCalculator& reduceDescCalc,
|
||||
typename Dist::ResultType& myMin, int& myBestTrainIdx, int& myBestImgIdx, typename Dist::ResultType* sdiff_row) const
|
||||
{
|
||||
matchDescs<Dist>(queryIdx, 0, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
|
||||
}
|
||||
|
||||
__device__ int desc_len() const
|
||||
__device__ __forceinline__ int desc_len() const
|
||||
{
|
||||
return trainDescs.cols;
|
||||
}
|
||||
@@ -532,7 +542,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
}
|
||||
}
|
||||
|
||||
__device__ int desc_len() const
|
||||
__device__ __forceinline__ int desc_len() const
|
||||
{
|
||||
return desclen;
|
||||
}
|
||||
|
||||
@@ -56,7 +56,7 @@ namespace cv { namespace gpu
|
||||
|
||||
struct TransformOp
|
||||
{
|
||||
__device__ float3 operator()(float3 p) const
|
||||
__device__ __forceinline__ float3 operator()(float3 p) const
|
||||
{
|
||||
return make_float3(
|
||||
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
|
||||
@@ -89,7 +89,7 @@ namespace cv { namespace gpu
|
||||
|
||||
struct ProjectOp
|
||||
{
|
||||
__device__ float2 operator()(float3 p) const
|
||||
__device__ __forceinline__ float2 operator()(float3 p) const
|
||||
{
|
||||
// Rotate and translate in 3D
|
||||
float3 t = make_float3(
|
||||
@@ -128,7 +128,7 @@ namespace cv { namespace gpu
|
||||
return SOLVE_PNP_RANSAC_MAX_NUM_ITERS;
|
||||
}
|
||||
|
||||
__device__ float sqr(float x)
|
||||
__device__ __forceinline__ float sqr(float x)
|
||||
{
|
||||
return x * x;
|
||||
}
|
||||
|
||||
@@ -59,38 +59,38 @@ namespace cv { namespace gpu { namespace color
|
||||
template<> struct ColorChannel<uchar>
|
||||
{
|
||||
typedef float worktype_f;
|
||||
static __device__ uchar max() { return UCHAR_MAX; }
|
||||
static __device__ uchar half() { return (uchar)(max()/2 + 1); }
|
||||
static __device__ __forceinline__ uchar max() { return UCHAR_MAX; }
|
||||
static __device__ __forceinline__ uchar half() { return (uchar)(max()/2 + 1); }
|
||||
};
|
||||
template<> struct ColorChannel<ushort>
|
||||
{
|
||||
typedef float worktype_f;
|
||||
static __device__ ushort max() { return USHRT_MAX; }
|
||||
static __device__ ushort half() { return (ushort)(max()/2 + 1); }
|
||||
static __device__ __forceinline__ ushort max() { return USHRT_MAX; }
|
||||
static __device__ __forceinline__ ushort half() { return (ushort)(max()/2 + 1); }
|
||||
};
|
||||
template<> struct ColorChannel<float>
|
||||
{
|
||||
typedef float worktype_f;
|
||||
static __device__ float max() { return 1.f; }
|
||||
static __device__ float half() { return 0.5f; }
|
||||
static __device__ __forceinline__ float max() { return 1.f; }
|
||||
static __device__ __forceinline__ float half() { return 0.5f; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ void setAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)
|
||||
__device__ __forceinline__ void setAlpha(typename TypeVec<T, 3>::vec_t& vec, T val)
|
||||
{
|
||||
}
|
||||
template <typename T>
|
||||
__device__ void setAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)
|
||||
__device__ __forceinline__ void setAlpha(typename TypeVec<T, 4>::vec_t& vec, T val)
|
||||
{
|
||||
vec.w = val;
|
||||
}
|
||||
template <typename T>
|
||||
__device__ T getAlpha(const typename TypeVec<T, 3>::vec_t& vec)
|
||||
__device__ __forceinline__ T getAlpha(const typename TypeVec<T, 3>::vec_t& vec)
|
||||
{
|
||||
return ColorChannel<T>::max();
|
||||
}
|
||||
template <typename T>
|
||||
__device__ T getAlpha(const typename TypeVec<T, 4>::vec_t& vec)
|
||||
__device__ __forceinline__ T getAlpha(const typename TypeVec<T, 4>::vec_t& vec)
|
||||
{
|
||||
return vec.w;
|
||||
}
|
||||
@@ -114,7 +114,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit RGB2RGB(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
|
||||
@@ -179,7 +179,7 @@ namespace cv { namespace gpu { namespace color
|
||||
template <> struct RGB5x52RGBConverter<5>
|
||||
{
|
||||
template <typename D>
|
||||
static __device__ void cvt(uint src, D& dst, int bidx)
|
||||
static __device__ __forceinline__ void cvt(uint src, D& dst, int bidx)
|
||||
{
|
||||
(&dst.x)[bidx] = (uchar)(src << 3);
|
||||
dst.y = (uchar)((src >> 2) & ~7);
|
||||
@@ -190,7 +190,7 @@ namespace cv { namespace gpu { namespace color
|
||||
template <> struct RGB5x52RGBConverter<6>
|
||||
{
|
||||
template <typename D>
|
||||
static __device__ void cvt(uint src, D& dst, int bidx)
|
||||
static __device__ __forceinline__ void cvt(uint src, D& dst, int bidx)
|
||||
{
|
||||
(&dst.x)[bidx] = (uchar)(src << 3);
|
||||
dst.y = (uchar)((src >> 3) & ~3);
|
||||
@@ -206,7 +206,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit RGB5x52RGB(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(ushort src) const
|
||||
__device__ __forceinline__ dst_t operator()(ushort src) const
|
||||
{
|
||||
dst_t dst;
|
||||
RGB5x52RGBConverter<GREEN_BITS>::cvt((uint)src, dst, bidx);
|
||||
@@ -221,18 +221,18 @@ namespace cv { namespace gpu { namespace color
|
||||
template<> struct RGB2RGB5x5Converter<6>
|
||||
{
|
||||
template <typename T>
|
||||
static __device__ ushort cvt(const T& src, int bidx)
|
||||
static __device__ __forceinline__ ushort cvt(const T& src, int bidx)
|
||||
{
|
||||
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~3) << 3) | (((&src.x)[bidx^2] & ~7) << 8));
|
||||
}
|
||||
};
|
||||
template<> struct RGB2RGB5x5Converter<5>
|
||||
{
|
||||
static __device__ ushort cvt(const uchar3& src, int bidx)
|
||||
static __device__ __forceinline__ ushort cvt(const uchar3& src, int bidx)
|
||||
{
|
||||
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7));
|
||||
}
|
||||
static __device__ ushort cvt(const uchar4& src, int bidx)
|
||||
static __device__ __forceinline__ ushort cvt(const uchar4& src, int bidx)
|
||||
{
|
||||
return (ushort)(((&src.x)[bidx] >> 3) | ((src.y & ~7) << 2) | (((&src.x)[bidx^2] & ~7) << 7) | (src.w ? 0x8000 : 0));
|
||||
}
|
||||
@@ -245,7 +245,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit RGB2RGB5x5(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ ushort operator()(const src_t& src)
|
||||
__device__ __forceinline__ ushort operator()(const src_t& src)
|
||||
{
|
||||
return RGB2RGB5x5Converter<GREEN_BITS>::cvt(src, bidx);
|
||||
}
|
||||
@@ -299,7 +299,7 @@ namespace cv { namespace gpu { namespace color
|
||||
typedef T src_t;
|
||||
typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
|
||||
|
||||
__device__ dst_t operator()(const T& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const T& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
|
||||
@@ -313,14 +313,14 @@ namespace cv { namespace gpu { namespace color
|
||||
template <int GREEN_BITS> struct Gray2RGB5x5Converter;
|
||||
template<> struct Gray2RGB5x5Converter<6>
|
||||
{
|
||||
static __device__ ushort cvt(uint t)
|
||||
static __device__ __forceinline__ ushort cvt(uint t)
|
||||
{
|
||||
return (ushort)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
|
||||
}
|
||||
};
|
||||
template<> struct Gray2RGB5x5Converter<5>
|
||||
{
|
||||
static __device__ ushort cvt(uint t)
|
||||
static __device__ __forceinline__ ushort cvt(uint t)
|
||||
{
|
||||
t >>= 3;
|
||||
return (ushort)(t | (t << 5) | (t << 10));
|
||||
@@ -332,7 +332,7 @@ namespace cv { namespace gpu { namespace color
|
||||
typedef uchar src_t;
|
||||
typedef ushort dst_t;
|
||||
|
||||
__device__ ushort operator()(uchar src) const
|
||||
__device__ __forceinline__ ushort operator()(uchar src) const
|
||||
{
|
||||
return Gray2RGB5x5Converter<GREEN_BITS>::cvt((uint)src);
|
||||
}
|
||||
@@ -406,14 +406,14 @@ namespace cv { namespace gpu { namespace color
|
||||
template <int GREEN_BITS> struct RGB5x52GrayConverter;
|
||||
template<> struct RGB5x52GrayConverter<6>
|
||||
{
|
||||
static __device__ uchar cvt(uint t)
|
||||
static __device__ __forceinline__ uchar cvt(uint t)
|
||||
{
|
||||
return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 3) & 0xfc) * G2Y + ((t >> 8) & 0xf8) * R2Y, yuv_shift);
|
||||
}
|
||||
};
|
||||
template<> struct RGB5x52GrayConverter<5>
|
||||
{
|
||||
static __device__ uchar cvt(uint t)
|
||||
static __device__ __forceinline__ uchar cvt(uint t)
|
||||
{
|
||||
return (uchar)CV_DESCALE(((t << 3) & 0xf8) * B2Y + ((t >> 2) & 0xf8) * G2Y + ((t >> 7) & 0xf8) * R2Y, yuv_shift);
|
||||
}
|
||||
@@ -424,18 +424,18 @@ namespace cv { namespace gpu { namespace color
|
||||
typedef ushort src_t;
|
||||
typedef uchar dst_t;
|
||||
|
||||
__device__ uchar operator()(ushort src) const
|
||||
__device__ __forceinline__ uchar operator()(ushort src) const
|
||||
{
|
||||
return RGB5x52GrayConverter<GREEN_BITS>::cvt((uint)src);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
__device__ T RGB2GrayConvert(const T* src, int bidx)
|
||||
__device__ __forceinline__ T RGB2GrayConvert(const T* src, int bidx)
|
||||
{
|
||||
return (T)CV_DESCALE((unsigned)(src[bidx] * B2Y + src[1] * G2Y + src[bidx^2] * R2Y), yuv_shift);
|
||||
}
|
||||
__device__ float RGB2GrayConvert(const float* src, int bidx)
|
||||
__device__ __forceinline__ float RGB2GrayConvert(const float* src, int bidx)
|
||||
{
|
||||
const float cr = 0.299f;
|
||||
const float cg = 0.587f;
|
||||
@@ -451,7 +451,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit RGB2Gray(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ T operator()(const src_t& src)
|
||||
__device__ __forceinline__ T operator()(const src_t& src)
|
||||
{
|
||||
return RGB2GrayConvert(&src.x, bidx);
|
||||
}
|
||||
@@ -515,7 +515,7 @@ namespace cv { namespace gpu { namespace color
|
||||
__constant__ float cYCrCbCoeffs_f[5];
|
||||
|
||||
template <typename T, typename D>
|
||||
__device__ void RGB2YCrCbConvert(const T* src, D& dst, int bidx)
|
||||
__device__ __forceinline__ void RGB2YCrCbConvert(const T* src, D& dst, int bidx)
|
||||
{
|
||||
const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
|
||||
|
||||
@@ -528,7 +528,7 @@ namespace cv { namespace gpu { namespace color
|
||||
dst.z = saturate_cast<T>(Cb);
|
||||
}
|
||||
template <typename D>
|
||||
static __device__ void RGB2YCrCbConvert(const float* src, D& dst, int bidx)
|
||||
static __device__ __forceinline__ void RGB2YCrCbConvert(const float* src, D& dst, int bidx)
|
||||
{
|
||||
dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2];
|
||||
dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half();
|
||||
@@ -561,7 +561,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
RGB2YCrCb(int bidx, const coeff_t coeffs[5]) : RGB2YCrCbBase<T>(coeffs), bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
RGB2YCrCbConvert(&src.x, dst, bidx);
|
||||
@@ -573,7 +573,7 @@ namespace cv { namespace gpu { namespace color
|
||||
};
|
||||
|
||||
template <typename T, typename D>
|
||||
__device__ void YCrCb2RGBConvert(const T& src, D* dst, int bidx)
|
||||
__device__ __forceinline__ void YCrCb2RGBConvert(const T& src, D* dst, int bidx)
|
||||
{
|
||||
const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[3], yuv_shift);
|
||||
const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[1], yuv_shift);
|
||||
@@ -584,7 +584,7 @@ namespace cv { namespace gpu { namespace color
|
||||
dst[bidx^2] = saturate_cast<D>(r);
|
||||
}
|
||||
template <typename T>
|
||||
__device__ void YCrCb2RGBConvert(const T& src, float* dst, int bidx)
|
||||
__device__ __forceinline__ void YCrCb2RGBConvert(const T& src, float* dst, int bidx)
|
||||
{
|
||||
dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3];
|
||||
dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1];
|
||||
@@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
YCrCb2RGB(int bidx, const coeff_t coeffs[4]) : YCrCb2RGBBase<T>(coeffs), bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
|
||||
@@ -725,14 +725,14 @@ namespace cv { namespace gpu { namespace color
|
||||
__constant__ float cXYZ_D65f[9];
|
||||
|
||||
template <typename T, typename D>
|
||||
__device__ void RGB2XYZConvert(const T* src, D& dst)
|
||||
__device__ __forceinline__ void RGB2XYZConvert(const T* src, D& dst)
|
||||
{
|
||||
dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));
|
||||
dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));
|
||||
dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));
|
||||
}
|
||||
template <typename D>
|
||||
__device__ void RGB2XYZConvert(const float* src, D& dst)
|
||||
__device__ __forceinline__ void RGB2XYZConvert(const float* src, D& dst)
|
||||
{
|
||||
dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];
|
||||
dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];
|
||||
@@ -765,7 +765,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit RGB2XYZ(const coeff_t coeffs[9]) : RGB2XYZBase<T>(coeffs) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
RGB2XYZConvert(&src.x, dst);
|
||||
@@ -774,14 +774,14 @@ namespace cv { namespace gpu { namespace color
|
||||
};
|
||||
|
||||
template <typename T, typename D>
|
||||
__device__ void XYZ2RGBConvert(const T& src, D* dst)
|
||||
__device__ __forceinline__ void XYZ2RGBConvert(const T& src, D* dst)
|
||||
{
|
||||
dst[0] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));
|
||||
dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));
|
||||
dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));
|
||||
}
|
||||
template <typename T>
|
||||
__device__ void XYZ2RGBConvert(const T& src, float* dst)
|
||||
__device__ __forceinline__ void XYZ2RGBConvert(const T& src, float* dst)
|
||||
{
|
||||
dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];
|
||||
dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];
|
||||
@@ -814,7 +814,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit XYZ2RGB(const coeff_t coeffs[9]) : XYZ2RGBBase<T>(coeffs) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
XYZ2RGBConvert(src, &dst.x);
|
||||
@@ -987,7 +987,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit RGB2HSV(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
RGB2HSVConvert<HR>(&src.x, dst, bidx);
|
||||
@@ -1062,7 +1062,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit HSV2RGB(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
HSV2RGBConvert<HR>(src, &dst.x, bidx);
|
||||
@@ -1214,7 +1214,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit RGB2HLS(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
RGB2HLSConvert<HR>(&src.x, dst, bidx);
|
||||
@@ -1295,7 +1295,7 @@ namespace cv { namespace gpu { namespace color
|
||||
|
||||
explicit HLS2RGB(int bidx) : bidx(bidx) {}
|
||||
|
||||
__device__ dst_t operator()(const src_t& src) const
|
||||
__device__ __forceinline__ dst_t operator()(const src_t& src) const
|
||||
{
|
||||
dst_t dst;
|
||||
HLS2RGBConvert<HR>(src, &dst.x, bidx);
|
||||
|
||||
@@ -57,7 +57,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <typename T1, typename T2>
|
||||
struct NotEqual
|
||||
{
|
||||
__device__ uchar operator()(const T1& src1, const T2& src2)
|
||||
__device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2)
|
||||
{
|
||||
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
|
||||
}
|
||||
@@ -91,7 +91,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <typename T>
|
||||
struct UnOp<T, UN_OP_NOT>
|
||||
{
|
||||
static __device__ T call(T v) { return ~v; }
|
||||
static __device__ __forceinline__ T call(T v) { return ~v; }
|
||||
};
|
||||
|
||||
|
||||
@@ -199,20 +199,20 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <typename T>
|
||||
struct BinOp<T, BIN_OP_OR>
|
||||
{
|
||||
static __device__ T call(T a, T b) { return a | b; }
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a | b; }
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct BinOp<T, BIN_OP_AND>
|
||||
{
|
||||
static __device__ T call(T a, T b) { return a & b; }
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a & b; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
struct BinOp<T, BIN_OP_XOR>
|
||||
{
|
||||
static __device__ T call(T a, T b) { return a ^ b; }
|
||||
static __device__ __forceinline__ T call(T a, T b) { return a ^ b; }
|
||||
};
|
||||
|
||||
|
||||
@@ -357,15 +357,15 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
struct MinOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ T operator()(T a, T b)
|
||||
__device__ __forceinline__ T operator()(T a, T b)
|
||||
{
|
||||
return min(a, b);
|
||||
}
|
||||
__device__ float operator()(float a, float b)
|
||||
__device__ __forceinline__ float operator()(float a, float b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
__device__ double operator()(double a, double b)
|
||||
__device__ __forceinline__ double operator()(double a, double b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
@@ -374,15 +374,15 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
struct MaxOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ T operator()(T a, T b)
|
||||
__device__ __forceinline__ T operator()(T a, T b)
|
||||
{
|
||||
return max(a, b);
|
||||
}
|
||||
__device__ float operator()(float a, float b)
|
||||
__device__ __forceinline__ float operator()(float a, float b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
__device__ double operator()(double a, double b)
|
||||
__device__ __forceinline__ double operator()(double a, double b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
@@ -394,7 +394,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
explicit ScalarMinOp(T s_) : s(s_) {}
|
||||
|
||||
__device__ T operator()(T a)
|
||||
__device__ __forceinline__ T operator()(T a)
|
||||
{
|
||||
return min(a, s);
|
||||
}
|
||||
@@ -405,7 +405,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
explicit ScalarMinOp(float s_) : s(s_) {}
|
||||
|
||||
__device__ float operator()(float a)
|
||||
__device__ __forceinline__ float operator()(float a)
|
||||
{
|
||||
return fmin(a, s);
|
||||
}
|
||||
@@ -416,7 +416,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
explicit ScalarMinOp(double s_) : s(s_) {}
|
||||
|
||||
__device__ double operator()(double a)
|
||||
__device__ __forceinline__ double operator()(double a)
|
||||
{
|
||||
return fmin(a, s);
|
||||
}
|
||||
@@ -428,7 +428,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
explicit ScalarMaxOp(T s_) : s(s_) {}
|
||||
|
||||
__device__ T operator()(T a)
|
||||
__device__ __forceinline__ T operator()(T a)
|
||||
{
|
||||
return max(a, s);
|
||||
}
|
||||
@@ -439,7 +439,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
explicit ScalarMaxOp(float s_) : s(s_) {}
|
||||
|
||||
__device__ float operator()(float a)
|
||||
__device__ __forceinline__ float operator()(float a)
|
||||
{
|
||||
return fmax(a, s);
|
||||
}
|
||||
@@ -450,7 +450,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
explicit ScalarMaxOp(double s_) : s(s_) {}
|
||||
|
||||
__device__ double operator()(double a)
|
||||
__device__ __forceinline__ double operator()(double a)
|
||||
{
|
||||
return fmax(a, s);
|
||||
}
|
||||
@@ -524,7 +524,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
ThreshBinary(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ T operator()(const T& src) const
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? maxVal : 0;
|
||||
}
|
||||
@@ -538,7 +538,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
ThreshBinaryInv(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ T operator()(const T& src) const
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : maxVal;
|
||||
}
|
||||
@@ -552,7 +552,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
ThreshTrunc(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ T operator()(const T& src) const
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return min(src, thresh);
|
||||
}
|
||||
@@ -564,7 +564,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
ThreshTrunc(float thresh_, float) : thresh(thresh_) {}
|
||||
|
||||
__device__ float operator()(const float& src) const
|
||||
__device__ __forceinline__ float operator()(const float& src) const
|
||||
{
|
||||
return fmin(src, thresh);
|
||||
}
|
||||
@@ -576,7 +576,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
ThreshTrunc(double thresh_, double) : thresh(thresh_) {}
|
||||
|
||||
__device__ double operator()(const double& src) const
|
||||
__device__ __forceinline__ double operator()(const double& src) const
|
||||
{
|
||||
return fmin(src, thresh);
|
||||
}
|
||||
@@ -590,7 +590,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
public:
|
||||
ThreshToZero(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ T operator()(const T& src) const
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? src : 0;
|
||||
}
|
||||
@@ -604,7 +604,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
public:
|
||||
ThreshToZeroInv(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ T operator()(const T& src) const
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : src;
|
||||
}
|
||||
|
||||
@@ -406,7 +406,7 @@ namespace bf_krnls
|
||||
template <int channels>
|
||||
struct DistRgbMax
|
||||
{
|
||||
static __device__ uchar calc(const uchar* a, const uchar* b)
|
||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
uchar x = abs(a[0] - b[0]);
|
||||
uchar y = abs(a[1] - b[1]);
|
||||
@@ -418,7 +418,7 @@ namespace bf_krnls
|
||||
template <>
|
||||
struct DistRgbMax<1>
|
||||
{
|
||||
static __device__ uchar calc(const uchar* a, const uchar* b)
|
||||
static __device__ __forceinline__ uchar calc(const uchar* a, const uchar* b)
|
||||
{
|
||||
return abs(a[0] - b[0]);
|
||||
}
|
||||
|
||||
@@ -48,35 +48,35 @@ using namespace cv::gpu::device;
|
||||
|
||||
namespace cv { namespace gpu { namespace imgproc {
|
||||
|
||||
__device__ float sum(float v) { return v; }
|
||||
__device__ float sum(float2 v) { return v.x + v.y; }
|
||||
__device__ float sum(float3 v) { return v.x + v.y + v.z; }
|
||||
__device__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
|
||||
__device__ __forceinline__ float sum(float v) { return v; }
|
||||
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
|
||||
__device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; }
|
||||
__device__ __forceinline__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
|
||||
|
||||
__device__ float first(float v) { return v; }
|
||||
__device__ float first(float2 v) { return v.x; }
|
||||
__device__ float first(float3 v) { return v.x; }
|
||||
__device__ float first(float4 v) { return v.x; }
|
||||
__device__ __forceinline__ float first(float v) { return v; }
|
||||
__device__ __forceinline__ float first(float2 v) { return v.x; }
|
||||
__device__ __forceinline__ float first(float3 v) { return v.x; }
|
||||
__device__ __forceinline__ float first(float4 v) { return v.x; }
|
||||
|
||||
__device__ float mul(float a, float b) { return a * b; }
|
||||
__device__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||
__device__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||
__device__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||
__device__ __forceinline__ float mul(float a, float b) { return a * b; }
|
||||
__device__ __forceinline__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||
__device__ __forceinline__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||
__device__ __forceinline__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||
|
||||
__device__ float mul(uchar a, uchar b) { return a * b; }
|
||||
__device__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||
__device__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||
__device__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||
__device__ __forceinline__ float mul(uchar a, uchar b) { return a * b; }
|
||||
__device__ __forceinline__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
|
||||
__device__ __forceinline__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
|
||||
__device__ __forceinline__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
|
||||
|
||||
__device__ float sub(float a, float b) { return a - b; }
|
||||
__device__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||
__device__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||
__device__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||
__device__ __forceinline__ float sub(float a, float b) { return a - b; }
|
||||
__device__ __forceinline__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||
__device__ __forceinline__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||
__device__ __forceinline__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||
|
||||
__device__ float sub(uchar a, uchar b) { return a - b; }
|
||||
__device__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||
__device__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||
__device__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||
__device__ __forceinline__ float sub(uchar a, uchar b) { return a - b; }
|
||||
__device__ __forceinline__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
|
||||
__device__ __forceinline__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
|
||||
__device__ __forceinline__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
|
||||
|
||||
|
||||
template <typename T, int cn>
|
||||
|
||||
@@ -60,27 +60,27 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
struct Nothing
|
||||
{
|
||||
static __device__ void calc(int, int, float, float, float*, size_t, float)
|
||||
static __device__ __forceinline__ void calc(int, int, float, float, float*, size_t, float)
|
||||
{
|
||||
}
|
||||
};
|
||||
struct Magnitude
|
||||
{
|
||||
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = sqrtf(x_data * x_data + y_data * y_data);
|
||||
}
|
||||
};
|
||||
struct MagnitudeSqr
|
||||
{
|
||||
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float)
|
||||
{
|
||||
dst[y * dst_step + x] = x_data * x_data + y_data * y_data;
|
||||
}
|
||||
};
|
||||
struct Atan2
|
||||
{
|
||||
static __device__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
||||
static __device__ __forceinline__ void calc(int x, int y, float x_data, float y_data, float* dst, size_t dst_step, float scale)
|
||||
{
|
||||
dst[y * dst_step + x] = scale * atan2f(y_data, x_data);
|
||||
}
|
||||
@@ -104,14 +104,14 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
struct NonEmptyMag
|
||||
{
|
||||
static __device__ float get(const float* mag, size_t mag_step, int x, int y)
|
||||
static __device__ __forceinline__ float get(const float* mag, size_t mag_step, int x, int y)
|
||||
{
|
||||
return mag[y * mag_step + x];
|
||||
}
|
||||
};
|
||||
struct EmptyMag
|
||||
{
|
||||
static __device__ float get(const float*, size_t, int, int)
|
||||
static __device__ __forceinline__ float get(const float*, size_t, int, int)
|
||||
{
|
||||
return 1.0f;
|
||||
}
|
||||
|
||||
@@ -123,14 +123,14 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
||||
__constant__ float scalar_32f[4];
|
||||
__constant__ double scalar_64f[4];
|
||||
|
||||
template <typename T> __device__ T readScalar(int i);
|
||||
template <> __device__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
|
||||
template <> __device__ schar readScalar<schar>(int i) {return scalar_8s[i];}
|
||||
template <> __device__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
|
||||
template <> __device__ short readScalar<short>(int i) {return scalar_16s[i];}
|
||||
template <> __device__ int readScalar<int>(int i) {return scalar_32s[i];}
|
||||
template <> __device__ float readScalar<float>(int i) {return scalar_32f[i];}
|
||||
template <> __device__ double readScalar<double>(int i) {return scalar_64f[i];}
|
||||
template <typename T> __device__ __forceinline__ T readScalar(int i);
|
||||
template <> __device__ __forceinline__ uchar readScalar<uchar>(int i) {return scalar_8u[i];}
|
||||
template <> __device__ __forceinline__ schar readScalar<schar>(int i) {return scalar_8s[i];}
|
||||
template <> __device__ __forceinline__ ushort readScalar<ushort>(int i) {return scalar_16u[i];}
|
||||
template <> __device__ __forceinline__ short readScalar<short>(int i) {return scalar_16s[i];}
|
||||
template <> __device__ __forceinline__ int readScalar<int>(int i) {return scalar_32s[i];}
|
||||
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
|
||||
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
|
||||
|
||||
void writeScalar(const uchar* vals)
|
||||
{
|
||||
@@ -243,7 +243,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
||||
public:
|
||||
Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {}
|
||||
|
||||
__device__ D operator()(const T& src)
|
||||
__device__ __forceinline__ D operator()(const T& src)
|
||||
{
|
||||
return saturate_cast<D>(alpha * src + beta);
|
||||
}
|
||||
|
||||
@@ -78,7 +78,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
explicit Mask8U(PtrStep mask): mask(mask) {}
|
||||
|
||||
__device__ bool operator()(int y, int x) const
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return mask.ptr(y)[x];
|
||||
}
|
||||
@@ -89,7 +89,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
struct MaskTrue
|
||||
{
|
||||
__device__ bool operator()(int y, int x) const
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -153,7 +153,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
// Does min and max in shared memory
|
||||
template <typename T>
|
||||
__device__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval)
|
||||
__device__ __forceinline__ void merge(uint tid, uint offset, volatile T* minval, volatile T* maxval)
|
||||
{
|
||||
minval[tid] = min(minval[tid], minval[tid + offset]);
|
||||
maxval[tid] = max(maxval[tid], maxval[tid + offset]);
|
||||
@@ -976,16 +976,16 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <> struct SumType<double> { typedef double R; };
|
||||
|
||||
template <typename R>
|
||||
struct IdentityOp { static __device__ R call(R x) { return x; } };
|
||||
struct IdentityOp { static __device__ __forceinline__ R call(R x) { return x; } };
|
||||
|
||||
template <typename R>
|
||||
struct AbsOp { static __device__ R call(R x) { return abs(x); } };
|
||||
struct AbsOp { static __device__ __forceinline__ R call(R x) { return abs(x); } };
|
||||
|
||||
template <>
|
||||
struct AbsOp<uint> { static __device__ uint call(uint x) { return x; } };
|
||||
struct AbsOp<uint> { static __device__ __forceinline__ uint call(uint x) { return x; } };
|
||||
|
||||
template <typename R>
|
||||
struct SqrOp { static __device__ R call(R x) { return x * x; } };
|
||||
struct SqrOp { static __device__ __forceinline__ R call(R x) { return x * x; } };
|
||||
|
||||
__constant__ int ctwidth;
|
||||
__constant__ int ctheight;
|
||||
|
||||
@@ -68,7 +68,7 @@ __constant__ size_t cminSSD_step;
|
||||
__constant__ int cwidth;
|
||||
__constant__ int cheight;
|
||||
|
||||
__device__ int SQ(int a)
|
||||
__device__ __forceinline__ int SQ(int a)
|
||||
{
|
||||
return a * a;
|
||||
}
|
||||
@@ -419,7 +419,7 @@ extern "C" void prefilter_xsobel(const DevMem2D& input, const DevMem2D& output,
|
||||
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> texForTF;
|
||||
|
||||
__device__ float sobel(int x, int y)
|
||||
__device__ __forceinline__ float sobel(int x, int y)
|
||||
{
|
||||
float conv = tex2D(texForTF, x - 1, y - 1) * (-1) + tex2D(texForTF, x + 1, y - 1) * (1) +
|
||||
tex2D(texForTF, x - 1, y ) * (-2) + tex2D(texForTF, x + 1, y ) * (2) +
|
||||
|
||||
@@ -76,11 +76,11 @@ namespace cv { namespace gpu { namespace bp
|
||||
template <int cn> struct PixDiff;
|
||||
template <> struct PixDiff<1>
|
||||
{
|
||||
__device__ PixDiff(const uchar* ls)
|
||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||
{
|
||||
l = *ls;
|
||||
}
|
||||
__device__ float operator()(const uchar* rs) const
|
||||
__device__ __forceinline__ float operator()(const uchar* rs) const
|
||||
{
|
||||
return abs((int)l - *rs);
|
||||
}
|
||||
@@ -88,11 +88,11 @@ namespace cv { namespace gpu { namespace bp
|
||||
};
|
||||
template <> struct PixDiff<3>
|
||||
{
|
||||
__device__ PixDiff(const uchar* ls)
|
||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||
{
|
||||
l = *((uchar3*)ls);
|
||||
}
|
||||
__device__ float operator()(const uchar* rs) const
|
||||
__device__ __forceinline__ float operator()(const uchar* rs) const
|
||||
{
|
||||
const float tr = 0.299f;
|
||||
const float tg = 0.587f;
|
||||
@@ -108,11 +108,11 @@ namespace cv { namespace gpu { namespace bp
|
||||
};
|
||||
template <> struct PixDiff<4>
|
||||
{
|
||||
__device__ PixDiff(const uchar* ls)
|
||||
__device__ __forceinline__ PixDiff(const uchar* ls)
|
||||
{
|
||||
l = *((uchar4*)ls);
|
||||
}
|
||||
__device__ float operator()(const uchar* rs) const
|
||||
__device__ __forceinline__ float operator()(const uchar* rs) const
|
||||
{
|
||||
const float tr = 0.299f;
|
||||
const float tg = 0.587f;
|
||||
|
||||
@@ -102,14 +102,14 @@ namespace cv { namespace gpu { namespace csbp
|
||||
template <int channels> struct DataCostPerPixel;
|
||||
template <> struct DataCostPerPixel<1>
|
||||
{
|
||||
static __device__ float compute(const uchar* left, const uchar* right)
|
||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
return fmin(cdata_weight * abs((int)*left - *right), cdata_weight * cmax_data_term);
|
||||
}
|
||||
};
|
||||
template <> struct DataCostPerPixel<3>
|
||||
{
|
||||
static __device__ float compute(const uchar* left, const uchar* right)
|
||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
float tb = 0.114f * abs((int)left[0] - right[0]);
|
||||
float tg = 0.587f * abs((int)left[1] - right[1]);
|
||||
@@ -120,7 +120,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
};
|
||||
template <> struct DataCostPerPixel<4>
|
||||
{
|
||||
static __device__ float compute(const uchar* left, const uchar* right)
|
||||
static __device__ __forceinline__ float compute(const uchar* left, const uchar* right)
|
||||
{
|
||||
uchar4 l = *((const uchar4*)left);
|
||||
uchar4 r = *((const uchar4*)right);
|
||||
|
||||
@@ -122,7 +122,7 @@ namespace cv { namespace gpu { namespace surf
|
||||
__constant__ float c_DY [3][5] = { {2, 0, 7, 3, 1}, {2, 3, 7, 6, -2}, {2, 6, 7, 9, 1} };
|
||||
__constant__ float c_DXY[4][5] = { {1, 1, 4, 4, 1}, {5, 1, 8, 4, -1}, {1, 5, 4, 8, -1}, {5, 5, 8, 8, 1} };
|
||||
|
||||
__host__ __device__ int calcSize(int octave, int layer)
|
||||
__host__ __device__ __forceinline__ int calcSize(int octave, int layer)
|
||||
{
|
||||
/* Wavelet size at first layer of first octave. */
|
||||
const int HAAR_SIZE0 = 9;
|
||||
@@ -189,7 +189,7 @@ namespace cv { namespace gpu { namespace surf
|
||||
|
||||
struct WithOutMask
|
||||
{
|
||||
static __device__ bool check(int, int, int)
|
||||
static __device__ __forceinline__ bool check(int, int, int)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
@@ -708,7 +708,7 @@ namespace cv { namespace gpu { namespace surf
|
||||
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
|
||||
};
|
||||
|
||||
__device__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)
|
||||
__device__ __forceinline__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)
|
||||
{
|
||||
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
|
||||
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
|
||||
|
||||
Reference in New Issue
Block a user