gpu device layer code refactoring
This commit is contained in:
@@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@@ -565,7 +565,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
|
||||
int myBestTrainIdx = -1;
|
||||
int myBestImgIdx = -1;
|
||||
typename Dist::ResultType myMin = numeric_limits_gpu<typename Dist::ResultType>::max();
|
||||
typename Dist::ResultType myMin = numeric_limits<typename Dist::ResultType>::max();
|
||||
|
||||
{
|
||||
typename Dist::ResultType* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y;
|
||||
@@ -821,7 +821,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
const T* trainDescs = trainDescs_.ptr(trainIdx);
|
||||
|
||||
typename Dist::ResultType myDist = numeric_limits_gpu<typename Dist::ResultType>::max();
|
||||
typename Dist::ResultType myDist = numeric_limits<typename Dist::ResultType>::max();
|
||||
|
||||
if (mask(queryIdx, trainIdx))
|
||||
{
|
||||
@@ -932,7 +932,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
const int tid = threadIdx.x;
|
||||
|
||||
T myMin = numeric_limits_gpu<T>::max();
|
||||
T myMin = numeric_limits<T>::max();
|
||||
int myMinIdx = -1;
|
||||
|
||||
for (int i = tid; i < n; i += BLOCK_SIZE)
|
||||
@@ -1007,10 +1007,10 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
float dist = sdist[0];
|
||||
if (dist < numeric_limits_gpu<float>::max())
|
||||
if (dist < numeric_limits<float>::max())
|
||||
{
|
||||
int bestIdx = strainIdx[0];
|
||||
allDist[bestIdx] = numeric_limits_gpu<float>::max();
|
||||
allDist[bestIdx] = numeric_limits<float>::max();
|
||||
trainIdx[i] = bestIdx;
|
||||
distance[i] = dist;
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -40,9 +40,10 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
@@ -354,114 +355,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// min/max
|
||||
|
||||
struct MinOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator()(T a, T b)
|
||||
{
|
||||
return min(a, b);
|
||||
}
|
||||
__device__ __forceinline__ float operator()(float a, float b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
__device__ __forceinline__ double operator()(double a, double b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
struct MaxOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator()(T a, T b)
|
||||
{
|
||||
return max(a, b);
|
||||
}
|
||||
__device__ __forceinline__ float operator()(float a, float b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
__device__ __forceinline__ double operator()(double a, double b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ScalarMinOp
|
||||
{
|
||||
T s;
|
||||
|
||||
explicit ScalarMinOp(T s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T a)
|
||||
{
|
||||
return min(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMinOp<float>
|
||||
{
|
||||
float s;
|
||||
|
||||
explicit ScalarMinOp(float s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(float a)
|
||||
{
|
||||
return fmin(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMinOp<double>
|
||||
{
|
||||
double s;
|
||||
|
||||
explicit ScalarMinOp(double s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(double a)
|
||||
{
|
||||
return fmin(a, s);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ScalarMaxOp
|
||||
{
|
||||
T s;
|
||||
|
||||
explicit ScalarMaxOp(T s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T a)
|
||||
{
|
||||
return max(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMaxOp<float>
|
||||
{
|
||||
float s;
|
||||
|
||||
explicit ScalarMaxOp(float s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(float a)
|
||||
{
|
||||
return fmax(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMaxOp<double>
|
||||
{
|
||||
double s;
|
||||
|
||||
explicit ScalarMaxOp(double s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(double a)
|
||||
{
|
||||
return fmax(a, s);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
MinOp op;
|
||||
transform(src1, src2, dst, op, stream);
|
||||
transform(src1, src2, dst, minimum<T>(), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@@ -475,8 +373,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
MaxOp op;
|
||||
transform(src1, src2, dst, op, stream);
|
||||
transform(src1, src2, dst, maximum<T>(), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@@ -490,8 +387,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
ScalarMinOp<T> op(src2);
|
||||
transform(src1, dst, op, stream);
|
||||
transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2D& src1, uchar src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@@ -501,12 +397,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template void min_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
||||
template void min_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
||||
template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
||||
|
||||
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
ScalarMaxOp<T> op(src2);
|
||||
transform(src1, dst, op, stream);
|
||||
transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2D& src1, uchar src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@@ -519,100 +414,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// threshold
|
||||
|
||||
template <typename T> struct ThreshBinary
|
||||
{
|
||||
ThreshBinary(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? maxVal : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshBinaryInv
|
||||
{
|
||||
ThreshBinaryInv(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : maxVal;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshTrunc
|
||||
{
|
||||
ThreshTrunc(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return min(src, thresh);
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
};
|
||||
template <> struct ThreshTrunc<float>
|
||||
{
|
||||
ThreshTrunc(float thresh_, float) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(const float& src) const
|
||||
{
|
||||
return fmin(src, thresh);
|
||||
}
|
||||
|
||||
private:
|
||||
float thresh;
|
||||
};
|
||||
template <> struct ThreshTrunc<double>
|
||||
{
|
||||
ThreshTrunc(double thresh_, double) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(const double& src) const
|
||||
{
|
||||
return fmin(src, thresh);
|
||||
}
|
||||
|
||||
private:
|
||||
double thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshToZero
|
||||
{
|
||||
public:
|
||||
ThreshToZero(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? src : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshToZeroInv
|
||||
{
|
||||
public:
|
||||
ThreshToZeroInv(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : src;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
};
|
||||
// threshold
|
||||
|
||||
template <template <typename> class Op, typename T>
|
||||
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
|
||||
@@ -631,11 +433,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
threshold_caller<ThreshBinary, T>,
|
||||
threshold_caller<ThreshBinaryInv, T>,
|
||||
threshold_caller<ThreshTrunc, T>,
|
||||
threshold_caller<ThreshToZero, T>,
|
||||
threshold_caller<ThreshToZeroInv, T>
|
||||
threshold_caller<thresh_binary_func, T>,
|
||||
threshold_caller<thresh_binary_inv_func, T>,
|
||||
threshold_caller<thresh_trunc_func, T>,
|
||||
threshold_caller<thresh_to_zero_func, T>,
|
||||
threshold_caller<thresh_to_zero_inv_func, T>
|
||||
};
|
||||
|
||||
callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);
|
||||
@@ -653,20 +455,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// subtract
|
||||
|
||||
template <typename T>
|
||||
class SubtractOp
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ T operator()(const T& l, const T& r) const
|
||||
{
|
||||
return l - r;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
|
||||
{
|
||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, SubtractOp<T>(), stream);
|
||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, minus<T>(), stream);
|
||||
}
|
||||
|
||||
template void subtractCaller<short>(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);
|
||||
@@ -675,7 +467,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
template<typename T, bool Signed = device::numeric_limits_gpu<T>::is_signed>
|
||||
template<typename T, bool Signed = device::numeric_limits<T>::is_signed>
|
||||
struct PowOp
|
||||
{
|
||||
float power;
|
||||
@@ -695,7 +487,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
__device__ __forceinline__ float operator()(const T& e)
|
||||
{
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
if ( (e < 0) && (1 & (int)power) )
|
||||
res *= -1;
|
||||
|
||||
@@ -42,8 +42,8 @@
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
|
||||
#include "safe_call.hpp"
|
||||
@@ -76,7 +76,7 @@ namespace filter_krnls
|
||||
{
|
||||
template <typename T, size_t size> struct SmemType_
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t smem_t;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
|
||||
};
|
||||
template <typename T> struct SmemType_<T, 4>
|
||||
{
|
||||
@@ -111,7 +111,7 @@ namespace filter_krnls
|
||||
|
||||
if (x < src.cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
sDataRow += threadIdx.x + BLOCK_DIM_X - anchor;
|
||||
@@ -253,7 +253,7 @@ namespace filter_krnls
|
||||
|
||||
if (y < src.rows)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
sDataColumn += (threadIdx.y + BLOCK_DIM_Y - anchor) * BLOCK_DIM_X;
|
||||
@@ -475,7 +475,7 @@ namespace bf_krnls
|
||||
}
|
||||
}
|
||||
|
||||
float minimum = numeric_limits_gpu<float>::max();
|
||||
float minimum = numeric_limits<float>::max();
|
||||
int id = 0;
|
||||
|
||||
if (cost[0] < minimum)
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@@ -50,14 +51,11 @@ using namespace cv::gpu::device;
|
||||
|
||||
#define UINT_BITS 32U
|
||||
|
||||
#define LOG2_WARP_SIZE 5U
|
||||
#define WARP_SIZE (1U << LOG2_WARP_SIZE)
|
||||
|
||||
//Warps == subhistograms per threadblock
|
||||
#define WARP_COUNT 6
|
||||
|
||||
//Threadblock size
|
||||
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * WARP_SIZE)
|
||||
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
|
||||
#define HISTOGRAM256_BIN_COUNT 256
|
||||
|
||||
//Shared memory per threadblock
|
||||
@@ -73,7 +71,7 @@ namespace cv { namespace gpu { namespace histograms
|
||||
{
|
||||
#if (!USE_SMEM_ATOMICS)
|
||||
|
||||
#define TAG_MASK ( (1U << (UINT_BITS - LOG2_WARP_SIZE)) - 1U )
|
||||
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
||||
|
||||
__forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag)
|
||||
{
|
||||
@@ -111,7 +109,7 @@ namespace cv { namespace gpu { namespace histograms
|
||||
{
|
||||
//Per-warp subhistogram storage
|
||||
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
|
||||
uint* s_WarpHist= s_Hist + (threadIdx.x >> LOG2_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
||||
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
||||
|
||||
//Clear shared memory storage for current threadblock before processing
|
||||
#pragma unroll
|
||||
@@ -119,7 +117,7 @@ namespace cv { namespace gpu { namespace histograms
|
||||
s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0;
|
||||
|
||||
//Cycle through the entire data set, update subhistograms for each warp
|
||||
const uint tag = threadIdx.x << (UINT_BITS - LOG2_WARP_SIZE);
|
||||
const uint tag = threadIdx.x << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE);
|
||||
|
||||
__syncthreads();
|
||||
const uint colsui = d_Data.step / sizeof(uint);
|
||||
|
||||
@@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
@@ -84,8 +84,8 @@ __global__ void matchTemplateNaiveKernel_CCORR(
|
||||
int w, int h, const PtrStep image, const PtrStep templ,
|
||||
DevMem2Df result)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_t Type;
|
||||
typedef typename TypeVec<float, cn>::vec_t Typef;
|
||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
@@ -174,8 +174,8 @@ __global__ void matchTemplateNaiveKernel_SQDIFF(
|
||||
int w, int h, const PtrStep image, const PtrStep templ,
|
||||
DevMem2Df result)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_t Type;
|
||||
typedef typename TypeVec<float, cn>::vec_t Typef;
|
||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
@@ -884,7 +884,7 @@ void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
||||
template <int cn>
|
||||
__global__ void extractFirstChannel_32F(const PtrStep image, DevMem2Df result)
|
||||
{
|
||||
typedef typename TypeVec<float, cn>::vec_t Typef;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
@@ -40,9 +40,9 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
|
||||
@@ -40,9 +40,9 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
@@ -190,8 +190,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
uint y0 = blockIdx.y * blockDim.y * ctheight + threadIdx.y;
|
||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
T mymin = numeric_limits_gpu<T>::max();
|
||||
T mymax = numeric_limits_gpu<T>::is_signed ? -numeric_limits_gpu<T>::max() : numeric_limits_gpu<T>::min();
|
||||
T mymin = numeric_limits<T>::max();
|
||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() : numeric_limits<T>::min();
|
||||
uint y_end = min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
||||
uint x_end = min(x0 + (ctwidth - 1) * blockDim.x + 1, src.cols);
|
||||
for (uint y = y0; y < y_end; y += blockDim.y)
|
||||
@@ -512,9 +512,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
uint y0 = blockIdx.y * blockDim.y * ctheight + threadIdx.y;
|
||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
T mymin = numeric_limits_gpu<T>::max();
|
||||
T mymax = numeric_limits_gpu<T>::is_signed ? -numeric_limits_gpu<T>::max() :
|
||||
numeric_limits_gpu<T>::min();
|
||||
T mymin = numeric_limits<T>::max();
|
||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() :
|
||||
numeric_limits<T>::min();
|
||||
uint myminloc = 0;
|
||||
uint mymaxloc = 0;
|
||||
uint y_end = min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
||||
@@ -1094,10 +1094,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, typename Op, int nthreads>
|
||||
__global__ void sumKernel_C2(const DevMem2D src, typename TypeVec<R, 2>::vec_t* result)
|
||||
__global__ void sumKernel_C2(const DevMem2D src, typename TypeVec<R, 2>::vec_type* result)
|
||||
{
|
||||
typedef typename TypeVec<T, 2>::vec_t SrcType;
|
||||
typedef typename TypeVec<R, 2>::vec_t DstType;
|
||||
typedef typename TypeVec<T, 2>::vec_type SrcType;
|
||||
typedef typename TypeVec<R, 2>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 2];
|
||||
|
||||
@@ -1173,9 +1173,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, int nthreads>
|
||||
__global__ void sumPass2Kernel_C2(typename TypeVec<R, 2>::vec_t* result, int size)
|
||||
__global__ void sumPass2Kernel_C2(typename TypeVec<R, 2>::vec_type* result, int size)
|
||||
{
|
||||
typedef typename TypeVec<R, 2>::vec_t DstType;
|
||||
typedef typename TypeVec<R, 2>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 2];
|
||||
|
||||
@@ -1199,10 +1199,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, typename Op, int nthreads>
|
||||
__global__ void sumKernel_C3(const DevMem2D src, typename TypeVec<R, 3>::vec_t* result)
|
||||
__global__ void sumKernel_C3(const DevMem2D src, typename TypeVec<R, 3>::vec_type* result)
|
||||
{
|
||||
typedef typename TypeVec<T, 3>::vec_t SrcType;
|
||||
typedef typename TypeVec<R, 3>::vec_t DstType;
|
||||
typedef typename TypeVec<T, 3>::vec_type SrcType;
|
||||
typedef typename TypeVec<R, 3>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 3];
|
||||
|
||||
@@ -1285,9 +1285,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, int nthreads>
|
||||
__global__ void sumPass2Kernel_C3(typename TypeVec<R, 3>::vec_t* result, int size)
|
||||
__global__ void sumPass2Kernel_C3(typename TypeVec<R, 3>::vec_type* result, int size)
|
||||
{
|
||||
typedef typename TypeVec<R, 3>::vec_t DstType;
|
||||
typedef typename TypeVec<R, 3>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 3];
|
||||
|
||||
@@ -1313,10 +1313,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
}
|
||||
|
||||
template <typename T, typename R, typename Op, int nthreads>
|
||||
__global__ void sumKernel_C4(const DevMem2D src, typename TypeVec<R, 4>::vec_t* result)
|
||||
__global__ void sumKernel_C4(const DevMem2D src, typename TypeVec<R, 4>::vec_type* result)
|
||||
{
|
||||
typedef typename TypeVec<T, 4>::vec_t SrcType;
|
||||
typedef typename TypeVec<R, 4>::vec_t DstType;
|
||||
typedef typename TypeVec<T, 4>::vec_type SrcType;
|
||||
typedef typename TypeVec<R, 4>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 4];
|
||||
|
||||
@@ -1407,9 +1407,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, int nthreads>
|
||||
__global__ void sumPass2Kernel_C4(typename TypeVec<R, 4>::vec_t* result, int size)
|
||||
__global__ void sumPass2Kernel_C4(typename TypeVec<R, 4>::vec_type* result, int size)
|
||||
{
|
||||
typedef typename TypeVec<R, 4>::vec_t DstType;
|
||||
typedef typename TypeVec<R, 4>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 4];
|
||||
|
||||
@@ -1454,41 +1454,41 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 1>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 2>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 3>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 4>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
@@ -1526,19 +1526,19 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@@ -1576,41 +1576,41 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 1>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 2>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 3>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 4>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
@@ -1648,19 +1648,19 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@@ -1698,41 +1698,41 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 1>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 2>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 3>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 4>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
@@ -1770,19 +1770,19 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@@ -381,7 +381,7 @@ namespace cv { namespace gpu { namespace bp
|
||||
template <typename T>
|
||||
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
|
||||
{
|
||||
float minimum = numeric_limits_gpu<float>::max();
|
||||
float minimum = numeric_limits<float>::max();
|
||||
|
||||
for(int i = 0; i < cndisp; ++i)
|
||||
{
|
||||
@@ -486,7 +486,7 @@ namespace cv { namespace gpu { namespace bp
|
||||
size_t disp_step = disp.rows * u.step;
|
||||
|
||||
int best = 0;
|
||||
float best_val = numeric_limits_gpu<float>::max();
|
||||
float best_val = numeric_limits<float>::max();
|
||||
for (int d = 0; d < cndisp; ++d)
|
||||
{
|
||||
float val = us[d * disp_step];
|
||||
|
||||
@@ -42,7 +42,7 @@
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@@ -147,7 +147,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
|
||||
for(int i = 0; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
for(int d = 0; d < cndisp; d++)
|
||||
{
|
||||
@@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
|
||||
data_cost_selected[i * cdisp_step1] = minimum;
|
||||
selected_disparity[i * cdisp_step1] = id;
|
||||
data_cost [id * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost [id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -192,7 +192,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
|
||||
selected_disparity[nr_local_minimum * cdisp_step1] = d;
|
||||
|
||||
data_cost[d * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost[d * cdisp_step1] = numeric_limits<T>::max();
|
||||
|
||||
nr_local_minimum++;
|
||||
}
|
||||
@@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
|
||||
for (int i = nr_local_minimum; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
|
||||
for (int d = 0; d < cndisp; d++)
|
||||
@@ -218,7 +218,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
data_cost_selected[i * cdisp_step1] = minimum;
|
||||
selected_disparity[i * cdisp_step1] = id;
|
||||
|
||||
data_cost[id * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost[id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -610,7 +610,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
{
|
||||
for(int i = 0; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
for(int j = 0; j < nr_plane2; j++)
|
||||
{
|
||||
@@ -630,7 +630,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
|
||||
data_cost_new[id * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost_new[id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -737,7 +737,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
|
||||
const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
{
|
||||
@@ -850,7 +850,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
const T* r = r_ + (y+0) * cmsg_step1 + (x-1);
|
||||
|
||||
int best = 0;
|
||||
T best_val = numeric_limits_gpu<T>::max();
|
||||
T best_val = numeric_limits<T>::max();
|
||||
for (int i = 0; i < nr_plane; ++i)
|
||||
{
|
||||
int idx = i * cdisp_step1;
|
||||
|
||||
@@ -46,8 +46,10 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
@@ -393,31 +395,10 @@ namespace cv { namespace gpu { namespace surf
|
||||
//dss
|
||||
H[2][2] = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
|
||||
|
||||
float det = H[0][0] * (H[1][1] * H[2][2] - H[1][2] * H[2][1])
|
||||
- H[0][1] * (H[1][0] * H[2][2] - H[1][2] * H[2][0])
|
||||
+ H[0][2] * (H[1][0] * H[2][1] - H[1][1] * H[2][0]);
|
||||
__shared__ float x[3];
|
||||
|
||||
if (det != 0.0f)
|
||||
if (solve3x3(H, dD, x))
|
||||
{
|
||||
float invdet = 1.0f / det;
|
||||
|
||||
__shared__ float x[3];
|
||||
|
||||
x[0] = invdet *
|
||||
(dD[0] * (H[1][1] * H[2][2] - H[1][2] * H[2][1]) -
|
||||
H[0][1] * (dD[1] * H[2][2] - H[1][2] * dD[2]) +
|
||||
H[0][2] * (dD[1] * H[2][1] - H[1][1] * dD[2]));
|
||||
|
||||
x[1] = invdet *
|
||||
(H[0][0] * (dD[1] * H[2][2] - H[1][2] * dD[2]) -
|
||||
dD[0] * (H[1][0] * H[2][2] - H[1][2] * H[2][0]) +
|
||||
H[0][2] * (H[1][0] * dD[2] - dD[1] * H[2][0]));
|
||||
|
||||
x[2] = invdet *
|
||||
(H[0][0] * (H[1][1] * dD[2] - dD[1] * H[2][1]) -
|
||||
H[0][1] * (H[1][0] * dD[2] - dD[1] * H[2][0]) +
|
||||
dD[0] * (H[1][0] * H[2][1] - H[1][1] * H[2][0]));
|
||||
|
||||
if (fabs(x[0]) <= 1.f && fabs(x[1]) <= 1.f && fabs(x[2]) <= 1.f)
|
||||
{
|
||||
// if the step is within the interpolation region, perform it
|
||||
@@ -500,20 +481,6 @@ namespace cv { namespace gpu { namespace surf
|
||||
__constant__ float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
|
||||
__constant__ float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
|
||||
|
||||
__device__ void reduceSum32(volatile float* v_sum, float& sum)
|
||||
{
|
||||
v_sum[threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.x < 16)
|
||||
{
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 16];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 8];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 4];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 2];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 1];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void icvCalcOrientation(const float* featureX, const float* featureY, const float* featureSize, float* featureDir)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
||||
@@ -599,8 +566,11 @@ namespace cv { namespace gpu { namespace surf
|
||||
|
||||
float* s_sum_row = s_sum + threadIdx.y * 32;
|
||||
|
||||
reduceSum32(s_sum_row, sumx);
|
||||
reduceSum32(s_sum_row, sumy);
|
||||
//reduceSum32(s_sum_row, sumx);
|
||||
//reduceSum32(s_sum_row, sumy);
|
||||
|
||||
warpReduce32(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
|
||||
warpReduce32(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
|
||||
Reference in New Issue
Block a user