gpu device layer code refactoring
This commit is contained in:
parent
fa0daa4809
commit
3ab2728da1
@ -23,7 +23,9 @@ source_group("Include" FILES ${lib_hdrs})
|
||||
|
||||
#file(GLOB lib_device_hdrs "include/opencv2/${name}/device/*.h*")
|
||||
file(GLOB lib_device_hdrs "src/opencv2/gpu/device/*.h*")
|
||||
file(GLOB lib_device_hdrs_detail "src/opencv2/gpu/device/detail/*.h*")
|
||||
source_group("Device" FILES ${lib_device_hdrs})
|
||||
source_group("Device\\Detail" FILES ${lib_device_hdrs_detail})
|
||||
|
||||
if (HAVE_CUDA)
|
||||
file(GLOB_RECURSE ncv_srcs "src/nvidia/*.cpp")
|
||||
@ -83,7 +85,7 @@ foreach(d ${DEPS})
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
||||
add_library(${the_target} ${lib_srcs} ${lib_hdrs} ${lib_int_hdrs} ${lib_cuda} ${lib_cuda_hdrs} ${lib_device_hdrs} ${lib_device_hdrs_detail} ${ncv_srcs} ${ncv_hdrs} ${ncv_cuda} ${cuda_objs})
|
||||
|
||||
# For dynamic link numbering convenions
|
||||
set_target_properties(${the_target} PROPERTIES
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/datamov_utils.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@ -565,7 +565,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
|
||||
int myBestTrainIdx = -1;
|
||||
int myBestImgIdx = -1;
|
||||
typename Dist::ResultType myMin = numeric_limits_gpu<typename Dist::ResultType>::max();
|
||||
typename Dist::ResultType myMin = numeric_limits<typename Dist::ResultType>::max();
|
||||
|
||||
{
|
||||
typename Dist::ResultType* sdiff_row = smem + BLOCK_DIM_X * threadIdx.y;
|
||||
@ -821,7 +821,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
const T* trainDescs = trainDescs_.ptr(trainIdx);
|
||||
|
||||
typename Dist::ResultType myDist = numeric_limits_gpu<typename Dist::ResultType>::max();
|
||||
typename Dist::ResultType myDist = numeric_limits<typename Dist::ResultType>::max();
|
||||
|
||||
if (mask(queryIdx, trainIdx))
|
||||
{
|
||||
@ -932,7 +932,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
const int tid = threadIdx.x;
|
||||
|
||||
T myMin = numeric_limits_gpu<T>::max();
|
||||
T myMin = numeric_limits<T>::max();
|
||||
int myMinIdx = -1;
|
||||
|
||||
for (int i = tid; i < n; i += BLOCK_SIZE)
|
||||
@ -1007,10 +1007,10 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
float dist = sdist[0];
|
||||
if (dist < numeric_limits_gpu<float>::max())
|
||||
if (dist < numeric_limits<float>::max())
|
||||
{
|
||||
int bestIdx = strainIdx[0];
|
||||
allDist[bestIdx] = numeric_limits_gpu<float>::max();
|
||||
allDist[bestIdx] = numeric_limits<float>::max();
|
||||
trainIdx[i] = bestIdx;
|
||||
distance[i] = dist;
|
||||
}
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -40,9 +40,10 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
@ -354,114 +355,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// min/max
|
||||
|
||||
struct MinOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator()(T a, T b)
|
||||
{
|
||||
return min(a, b);
|
||||
}
|
||||
__device__ __forceinline__ float operator()(float a, float b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
__device__ __forceinline__ double operator()(double a, double b)
|
||||
{
|
||||
return fmin(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
struct MaxOp
|
||||
{
|
||||
template <typename T>
|
||||
__device__ __forceinline__ T operator()(T a, T b)
|
||||
{
|
||||
return max(a, b);
|
||||
}
|
||||
__device__ __forceinline__ float operator()(float a, float b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
__device__ __forceinline__ double operator()(double a, double b)
|
||||
{
|
||||
return fmax(a, b);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ScalarMinOp
|
||||
{
|
||||
T s;
|
||||
|
||||
explicit ScalarMinOp(T s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T a)
|
||||
{
|
||||
return min(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMinOp<float>
|
||||
{
|
||||
float s;
|
||||
|
||||
explicit ScalarMinOp(float s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(float a)
|
||||
{
|
||||
return fmin(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMinOp<double>
|
||||
{
|
||||
double s;
|
||||
|
||||
explicit ScalarMinOp(double s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(double a)
|
||||
{
|
||||
return fmin(a, s);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ScalarMaxOp
|
||||
{
|
||||
T s;
|
||||
|
||||
explicit ScalarMaxOp(T s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(T a)
|
||||
{
|
||||
return max(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMaxOp<float>
|
||||
{
|
||||
float s;
|
||||
|
||||
explicit ScalarMaxOp(float s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(float a)
|
||||
{
|
||||
return fmax(a, s);
|
||||
}
|
||||
};
|
||||
template <> struct ScalarMaxOp<double>
|
||||
{
|
||||
double s;
|
||||
|
||||
explicit ScalarMaxOp(double s_) : s(s_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(double a)
|
||||
{
|
||||
return fmax(a, s);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
MinOp op;
|
||||
transform(src1, src2, dst, op, stream);
|
||||
transform(src1, src2, dst, minimum<T>(), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@ -475,8 +373,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
MaxOp op;
|
||||
transform(src1, src2, dst, op, stream);
|
||||
transform(src1, src2, dst, maximum<T>(), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@ -490,8 +387,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
ScalarMinOp<T> op(src2);
|
||||
transform(src1, dst, op, stream);
|
||||
transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2D& src1, uchar src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@ -501,12 +397,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
template void min_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream);
|
||||
template void min_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream);
|
||||
template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream);
|
||||
|
||||
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
ScalarMaxOp<T> op(src2);
|
||||
transform(src1, dst, op, stream);
|
||||
transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2D& src1, uchar src2, const DevMem2D& dst, cudaStream_t stream);
|
||||
@ -519,100 +414,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// threshold
|
||||
|
||||
template <typename T> struct ThreshBinary
|
||||
{
|
||||
ThreshBinary(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? maxVal : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshBinaryInv
|
||||
{
|
||||
ThreshBinaryInv(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : maxVal;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshTrunc
|
||||
{
|
||||
ThreshTrunc(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return min(src, thresh);
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
};
|
||||
template <> struct ThreshTrunc<float>
|
||||
{
|
||||
ThreshTrunc(float thresh_, float) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(const float& src) const
|
||||
{
|
||||
return fmin(src, thresh);
|
||||
}
|
||||
|
||||
private:
|
||||
float thresh;
|
||||
};
|
||||
template <> struct ThreshTrunc<double>
|
||||
{
|
||||
ThreshTrunc(double thresh_, double) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ double operator()(const double& src) const
|
||||
{
|
||||
return fmin(src, thresh);
|
||||
}
|
||||
|
||||
private:
|
||||
double thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshToZero
|
||||
{
|
||||
public:
|
||||
ThreshToZero(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? src : 0;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
};
|
||||
|
||||
template <typename T> struct ThreshToZeroInv
|
||||
{
|
||||
public:
|
||||
ThreshToZeroInv(T thresh_, T) : thresh(thresh_) {}
|
||||
|
||||
__device__ __forceinline__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : src;
|
||||
}
|
||||
|
||||
private:
|
||||
T thresh;
|
||||
};
|
||||
// threshold
|
||||
|
||||
template <template <typename> class Op, typename T>
|
||||
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
|
||||
@ -631,11 +433,11 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
threshold_caller<ThreshBinary, T>,
|
||||
threshold_caller<ThreshBinaryInv, T>,
|
||||
threshold_caller<ThreshTrunc, T>,
|
||||
threshold_caller<ThreshToZero, T>,
|
||||
threshold_caller<ThreshToZeroInv, T>
|
||||
threshold_caller<thresh_binary_func, T>,
|
||||
threshold_caller<thresh_binary_inv_func, T>,
|
||||
threshold_caller<thresh_trunc_func, T>,
|
||||
threshold_caller<thresh_to_zero_func, T>,
|
||||
threshold_caller<thresh_to_zero_inv_func, T>
|
||||
};
|
||||
|
||||
callers[type]((DevMem2D_<T>)src, (DevMem2D_<T>)dst, thresh, maxVal, stream);
|
||||
@ -653,20 +455,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// subtract
|
||||
|
||||
template <typename T>
|
||||
class SubtractOp
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ T operator()(const T& l, const T& r) const
|
||||
{
|
||||
return l - r;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
void subtractCaller(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
|
||||
{
|
||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, SubtractOp<T>(), stream);
|
||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, minus<T>(), stream);
|
||||
}
|
||||
|
||||
template void subtractCaller<short>(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);
|
||||
@ -675,7 +467,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
template<typename T, bool Signed = device::numeric_limits_gpu<T>::is_signed>
|
||||
template<typename T, bool Signed = device::numeric_limits<T>::is_signed>
|
||||
struct PowOp
|
||||
{
|
||||
float power;
|
||||
@ -695,7 +487,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
__device__ __forceinline__ float operator()(const T& e)
|
||||
{
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
if ( (e < 0) && (1 & (int)power) )
|
||||
res *= -1;
|
||||
|
@ -42,8 +42,8 @@
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
|
||||
#include "safe_call.hpp"
|
||||
@ -76,7 +76,7 @@ namespace filter_krnls
|
||||
{
|
||||
template <typename T, size_t size> struct SmemType_
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t smem_t;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type smem_t;
|
||||
};
|
||||
template <typename T> struct SmemType_<T, 4>
|
||||
{
|
||||
@ -111,7 +111,7 @@ namespace filter_krnls
|
||||
|
||||
if (x < src.cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
sDataRow += threadIdx.x + BLOCK_DIM_X - anchor;
|
||||
@ -253,7 +253,7 @@ namespace filter_krnls
|
||||
|
||||
if (y < src.rows)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_t sum_t;
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type sum_t;
|
||||
sum_t sum = VecTraits<sum_t>::all(0);
|
||||
|
||||
sDataColumn += (threadIdx.y + BLOCK_DIM_Y - anchor) * BLOCK_DIM_X;
|
||||
@ -475,7 +475,7 @@ namespace bf_krnls
|
||||
}
|
||||
}
|
||||
|
||||
float minimum = numeric_limits_gpu<float>::max();
|
||||
float minimum = numeric_limits<float>::max();
|
||||
int id = 0;
|
||||
|
||||
if (cost[0] < minimum)
|
||||
|
@ -42,6 +42,7 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@ -50,14 +51,11 @@ using namespace cv::gpu::device;
|
||||
|
||||
#define UINT_BITS 32U
|
||||
|
||||
#define LOG2_WARP_SIZE 5U
|
||||
#define WARP_SIZE (1U << LOG2_WARP_SIZE)
|
||||
|
||||
//Warps == subhistograms per threadblock
|
||||
#define WARP_COUNT 6
|
||||
|
||||
//Threadblock size
|
||||
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * WARP_SIZE)
|
||||
#define HISTOGRAM256_THREADBLOCK_SIZE (WARP_COUNT * OPENCV_GPU_WARP_SIZE)
|
||||
#define HISTOGRAM256_BIN_COUNT 256
|
||||
|
||||
//Shared memory per threadblock
|
||||
@ -73,7 +71,7 @@ namespace cv { namespace gpu { namespace histograms
|
||||
{
|
||||
#if (!USE_SMEM_ATOMICS)
|
||||
|
||||
#define TAG_MASK ( (1U << (UINT_BITS - LOG2_WARP_SIZE)) - 1U )
|
||||
#define TAG_MASK ( (1U << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE)) - 1U )
|
||||
|
||||
__forceinline__ __device__ void addByte(volatile uint* s_WarpHist, uint data, uint threadTag)
|
||||
{
|
||||
@ -111,7 +109,7 @@ namespace cv { namespace gpu { namespace histograms
|
||||
{
|
||||
//Per-warp subhistogram storage
|
||||
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
|
||||
uint* s_WarpHist= s_Hist + (threadIdx.x >> LOG2_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
||||
uint* s_WarpHist= s_Hist + (threadIdx.x >> OPENCV_GPU_LOG_WARP_SIZE) * HISTOGRAM256_BIN_COUNT;
|
||||
|
||||
//Clear shared memory storage for current threadblock before processing
|
||||
#pragma unroll
|
||||
@ -119,7 +117,7 @@ namespace cv { namespace gpu { namespace histograms
|
||||
s_Hist[threadIdx.x + i * HISTOGRAM256_THREADBLOCK_SIZE] = 0;
|
||||
|
||||
//Cycle through the entire data set, update subhistograms for each warp
|
||||
const uint tag = threadIdx.x << (UINT_BITS - LOG2_WARP_SIZE);
|
||||
const uint tag = threadIdx.x << (UINT_BITS - OPENCV_GPU_LOG_WARP_SIZE);
|
||||
|
||||
__syncthreads();
|
||||
const uint colsui = d_Data.step / sizeof(uint);
|
||||
|
@ -41,7 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
@ -84,8 +84,8 @@ __global__ void matchTemplateNaiveKernel_CCORR(
|
||||
int w, int h, const PtrStep image, const PtrStep templ,
|
||||
DevMem2Df result)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_t Type;
|
||||
typedef typename TypeVec<float, cn>::vec_t Typef;
|
||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
@ -174,8 +174,8 @@ __global__ void matchTemplateNaiveKernel_SQDIFF(
|
||||
int w, int h, const PtrStep image, const PtrStep templ,
|
||||
DevMem2Df result)
|
||||
{
|
||||
typedef typename TypeVec<T, cn>::vec_t Type;
|
||||
typedef typename TypeVec<float, cn>::vec_t Typef;
|
||||
typedef typename TypeVec<T, cn>::vec_type Type;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
@ -884,7 +884,7 @@ void normalize_8U(int w, int h, const DevMem2D_<unsigned long long> image_sqsum,
|
||||
template <int cn>
|
||||
__global__ void extractFirstChannel_32F(const PtrStep image, DevMem2Df result)
|
||||
{
|
||||
typedef typename TypeVec<float, cn>::vec_t Typef;
|
||||
typedef typename TypeVec<float, cn>::vec_type Typef;
|
||||
|
||||
int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
@ -40,9 +40,9 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
|
@ -40,9 +40,9 @@
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
@ -190,8 +190,8 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
uint y0 = blockIdx.y * blockDim.y * ctheight + threadIdx.y;
|
||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
T mymin = numeric_limits_gpu<T>::max();
|
||||
T mymax = numeric_limits_gpu<T>::is_signed ? -numeric_limits_gpu<T>::max() : numeric_limits_gpu<T>::min();
|
||||
T mymin = numeric_limits<T>::max();
|
||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() : numeric_limits<T>::min();
|
||||
uint y_end = min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
||||
uint x_end = min(x0 + (ctwidth - 1) * blockDim.x + 1, src.cols);
|
||||
for (uint y = y0; y < y_end; y += blockDim.y)
|
||||
@ -512,9 +512,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
uint y0 = blockIdx.y * blockDim.y * ctheight + threadIdx.y;
|
||||
uint tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
T mymin = numeric_limits_gpu<T>::max();
|
||||
T mymax = numeric_limits_gpu<T>::is_signed ? -numeric_limits_gpu<T>::max() :
|
||||
numeric_limits_gpu<T>::min();
|
||||
T mymin = numeric_limits<T>::max();
|
||||
T mymax = numeric_limits<T>::is_signed ? -numeric_limits<T>::max() :
|
||||
numeric_limits<T>::min();
|
||||
uint myminloc = 0;
|
||||
uint mymaxloc = 0;
|
||||
uint y_end = min(y0 + (ctheight - 1) * blockDim.y + 1, src.rows);
|
||||
@ -1094,10 +1094,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, typename Op, int nthreads>
|
||||
__global__ void sumKernel_C2(const DevMem2D src, typename TypeVec<R, 2>::vec_t* result)
|
||||
__global__ void sumKernel_C2(const DevMem2D src, typename TypeVec<R, 2>::vec_type* result)
|
||||
{
|
||||
typedef typename TypeVec<T, 2>::vec_t SrcType;
|
||||
typedef typename TypeVec<R, 2>::vec_t DstType;
|
||||
typedef typename TypeVec<T, 2>::vec_type SrcType;
|
||||
typedef typename TypeVec<R, 2>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 2];
|
||||
|
||||
@ -1173,9 +1173,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, int nthreads>
|
||||
__global__ void sumPass2Kernel_C2(typename TypeVec<R, 2>::vec_t* result, int size)
|
||||
__global__ void sumPass2Kernel_C2(typename TypeVec<R, 2>::vec_type* result, int size)
|
||||
{
|
||||
typedef typename TypeVec<R, 2>::vec_t DstType;
|
||||
typedef typename TypeVec<R, 2>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 2];
|
||||
|
||||
@ -1199,10 +1199,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, typename Op, int nthreads>
|
||||
__global__ void sumKernel_C3(const DevMem2D src, typename TypeVec<R, 3>::vec_t* result)
|
||||
__global__ void sumKernel_C3(const DevMem2D src, typename TypeVec<R, 3>::vec_type* result)
|
||||
{
|
||||
typedef typename TypeVec<T, 3>::vec_t SrcType;
|
||||
typedef typename TypeVec<R, 3>::vec_t DstType;
|
||||
typedef typename TypeVec<T, 3>::vec_type SrcType;
|
||||
typedef typename TypeVec<R, 3>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 3];
|
||||
|
||||
@ -1285,9 +1285,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, int nthreads>
|
||||
__global__ void sumPass2Kernel_C3(typename TypeVec<R, 3>::vec_t* result, int size)
|
||||
__global__ void sumPass2Kernel_C3(typename TypeVec<R, 3>::vec_type* result, int size)
|
||||
{
|
||||
typedef typename TypeVec<R, 3>::vec_t DstType;
|
||||
typedef typename TypeVec<R, 3>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 3];
|
||||
|
||||
@ -1313,10 +1313,10 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
}
|
||||
|
||||
template <typename T, typename R, typename Op, int nthreads>
|
||||
__global__ void sumKernel_C4(const DevMem2D src, typename TypeVec<R, 4>::vec_t* result)
|
||||
__global__ void sumKernel_C4(const DevMem2D src, typename TypeVec<R, 4>::vec_type* result)
|
||||
{
|
||||
typedef typename TypeVec<T, 4>::vec_t SrcType;
|
||||
typedef typename TypeVec<R, 4>::vec_t DstType;
|
||||
typedef typename TypeVec<T, 4>::vec_type SrcType;
|
||||
typedef typename TypeVec<R, 4>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 4];
|
||||
|
||||
@ -1407,9 +1407,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
|
||||
|
||||
template <typename T, typename R, int nthreads>
|
||||
__global__ void sumPass2Kernel_C4(typename TypeVec<R, 4>::vec_t* result, int size)
|
||||
__global__ void sumPass2Kernel_C4(typename TypeVec<R, 4>::vec_type* result, int size)
|
||||
{
|
||||
typedef typename TypeVec<R, 4>::vec_t DstType;
|
||||
typedef typename TypeVec<R, 4>::vec_type DstType;
|
||||
|
||||
__shared__ R smem[nthreads * 4];
|
||||
|
||||
@ -1454,41 +1454,41 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 1>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 2>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 3>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 4>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
@ -1526,19 +1526,19 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, IdentityOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@ -1576,41 +1576,41 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 1>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 2>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 3>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 4>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
@ -1648,19 +1648,19 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, AbsOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
@ -1698,41 +1698,41 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 1>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 1>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C2<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 2>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 2>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C3<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 3>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 3>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
sumPass2Kernel_C4<T, R, threads_x * threads_y><<<1, threads_x * threads_y>>>(
|
||||
(typename TypeVec<R, 4>::vec_t*)buf.ptr(0), grid.x * grid.y);
|
||||
(typename TypeVec<R, 4>::vec_type*)buf.ptr(0), grid.x * grid.y);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
break;
|
||||
@ -1770,19 +1770,19 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
{
|
||||
case 1:
|
||||
sumKernel<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 1>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 1>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 2:
|
||||
sumKernel_C2<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 2>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 2>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 3:
|
||||
sumKernel_C3<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 3>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 3>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
case 4:
|
||||
sumKernel_C4<T, R, SqrOp<R>, threads_x * threads_y><<<grid, threads>>>(
|
||||
src, (typename TypeVec<R, 4>::vec_t*)buf.ptr(0));
|
||||
src, (typename TypeVec<R, 4>::vec_type*)buf.ptr(0));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
@ -42,7 +42,7 @@
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@ -381,7 +381,7 @@ namespace cv { namespace gpu { namespace bp
|
||||
template <typename T>
|
||||
__device__ void message(const T* msg1, const T* msg2, const T* msg3, const T* data, T* dst, size_t msg_disp_step, size_t data_disp_step)
|
||||
{
|
||||
float minimum = numeric_limits_gpu<float>::max();
|
||||
float minimum = numeric_limits<float>::max();
|
||||
|
||||
for(int i = 0; i < cndisp; ++i)
|
||||
{
|
||||
@ -486,7 +486,7 @@ namespace cv { namespace gpu { namespace bp
|
||||
size_t disp_step = disp.rows * u.step;
|
||||
|
||||
int best = 0;
|
||||
float best_val = numeric_limits_gpu<float>::max();
|
||||
float best_val = numeric_limits<float>::max();
|
||||
for (int d = 0; d < cndisp; ++d)
|
||||
{
|
||||
float val = us[d * disp_step];
|
||||
|
@ -42,7 +42,7 @@
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "safe_call.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
@ -147,7 +147,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
|
||||
for(int i = 0; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
for(int d = 0; d < cndisp; d++)
|
||||
{
|
||||
@ -161,7 +161,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
|
||||
data_cost_selected[i * cdisp_step1] = minimum;
|
||||
selected_disparity[i * cdisp_step1] = id;
|
||||
data_cost [id * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost [id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -192,7 +192,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
data_cost_selected[nr_local_minimum * cdisp_step1] = cur;
|
||||
selected_disparity[nr_local_minimum * cdisp_step1] = d;
|
||||
|
||||
data_cost[d * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost[d * cdisp_step1] = numeric_limits<T>::max();
|
||||
|
||||
nr_local_minimum++;
|
||||
}
|
||||
@ -203,7 +203,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
|
||||
for (int i = nr_local_minimum; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
|
||||
for (int d = 0; d < cndisp; d++)
|
||||
@ -218,7 +218,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
data_cost_selected[i * cdisp_step1] = minimum;
|
||||
selected_disparity[i * cdisp_step1] = id;
|
||||
|
||||
data_cost[id * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost[id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -610,7 +610,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
{
|
||||
for(int i = 0; i < nr_plane; i++)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
int id = 0;
|
||||
for(int j = 0; j < nr_plane2; j++)
|
||||
{
|
||||
@ -630,7 +630,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
l_new[i * cdisp_step1] = l_cur[id * cdisp_step2];
|
||||
r_new[i * cdisp_step1] = r_cur[id * cdisp_step2];
|
||||
|
||||
data_cost_new[id * cdisp_step1] = numeric_limits_gpu<T>::max();
|
||||
data_cost_new[id * cdisp_step1] = numeric_limits<T>::max();
|
||||
}
|
||||
}
|
||||
|
||||
@ -737,7 +737,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
__device__ void message_per_pixel(const T* data, T* msg_dst, const T* msg1, const T* msg2, const T* msg3,
|
||||
const T* dst_disp, const T* src_disp, int nr_plane, T* temp)
|
||||
{
|
||||
T minimum = numeric_limits_gpu<T>::max();
|
||||
T minimum = numeric_limits<T>::max();
|
||||
|
||||
for(int d = 0; d < nr_plane; d++)
|
||||
{
|
||||
@ -850,7 +850,7 @@ namespace cv { namespace gpu { namespace csbp
|
||||
const T* r = r_ + (y+0) * cmsg_step1 + (x-1);
|
||||
|
||||
int best = 0;
|
||||
T best_val = numeric_limits_gpu<T>::max();
|
||||
T best_val = numeric_limits<T>::max();
|
||||
for (int i = 0; i < nr_plane; ++i)
|
||||
{
|
||||
int idx = i * cdisp_step1;
|
||||
|
@ -46,8 +46,10 @@
|
||||
//M*/
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/limits_gpu.hpp"
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
@ -393,31 +395,10 @@ namespace cv { namespace gpu { namespace surf
|
||||
//dss
|
||||
H[2][2] = N9[0][1][1] - 2.0f * N9[1][1][1] + N9[2][1][1];
|
||||
|
||||
float det = H[0][0] * (H[1][1] * H[2][2] - H[1][2] * H[2][1])
|
||||
- H[0][1] * (H[1][0] * H[2][2] - H[1][2] * H[2][0])
|
||||
+ H[0][2] * (H[1][0] * H[2][1] - H[1][1] * H[2][0]);
|
||||
__shared__ float x[3];
|
||||
|
||||
if (det != 0.0f)
|
||||
if (solve3x3(H, dD, x))
|
||||
{
|
||||
float invdet = 1.0f / det;
|
||||
|
||||
__shared__ float x[3];
|
||||
|
||||
x[0] = invdet *
|
||||
(dD[0] * (H[1][1] * H[2][2] - H[1][2] * H[2][1]) -
|
||||
H[0][1] * (dD[1] * H[2][2] - H[1][2] * dD[2]) +
|
||||
H[0][2] * (dD[1] * H[2][1] - H[1][1] * dD[2]));
|
||||
|
||||
x[1] = invdet *
|
||||
(H[0][0] * (dD[1] * H[2][2] - H[1][2] * dD[2]) -
|
||||
dD[0] * (H[1][0] * H[2][2] - H[1][2] * H[2][0]) +
|
||||
H[0][2] * (H[1][0] * dD[2] - dD[1] * H[2][0]));
|
||||
|
||||
x[2] = invdet *
|
||||
(H[0][0] * (H[1][1] * dD[2] - dD[1] * H[2][1]) -
|
||||
H[0][1] * (H[1][0] * dD[2] - dD[1] * H[2][0]) +
|
||||
dD[0] * (H[1][0] * H[2][1] - H[1][1] * H[2][0]));
|
||||
|
||||
if (fabs(x[0]) <= 1.f && fabs(x[1]) <= 1.f && fabs(x[2]) <= 1.f)
|
||||
{
|
||||
// if the step is within the interpolation region, perform it
|
||||
@ -500,20 +481,6 @@ namespace cv { namespace gpu { namespace surf
|
||||
__constant__ float c_NX[2][5] = {{0, 0, 2, 4, -1}, {2, 0, 4, 4, 1}};
|
||||
__constant__ float c_NY[2][5] = {{0, 0, 4, 2, 1}, {0, 2, 4, 4, -1}};
|
||||
|
||||
__device__ void reduceSum32(volatile float* v_sum, float& sum)
|
||||
{
|
||||
v_sum[threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.x < 16)
|
||||
{
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 16];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 8];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 4];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 2];
|
||||
v_sum[threadIdx.x] = sum += v_sum[threadIdx.x + 1];
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void icvCalcOrientation(const float* featureX, const float* featureY, const float* featureSize, float* featureDir)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
||||
@ -599,8 +566,11 @@ namespace cv { namespace gpu { namespace surf
|
||||
|
||||
float* s_sum_row = s_sum + threadIdx.y * 32;
|
||||
|
||||
reduceSum32(s_sum_row, sumx);
|
||||
reduceSum32(s_sum_row, sumy);
|
||||
//reduceSum32(s_sum_row, sumx);
|
||||
//reduceSum32(s_sum_row, sumy);
|
||||
|
||||
warpReduce32(s_sum_row, sumx, threadIdx.x, plus<volatile float>());
|
||||
warpReduce32(s_sum_row, sumy, threadIdx.x, plus<volatile float>());
|
||||
|
||||
const float temp_mod = sumx * sumx + sumy * sumy;
|
||||
if (temp_mod > best_mod)
|
||||
|
@ -43,8 +43,8 @@
|
||||
#ifndef __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||
#define __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vecmath.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
@ -72,64 +72,53 @@ namespace cv { namespace gpu { namespace device
|
||||
return -last <= mini && maxi <= 2 * last;
|
||||
}
|
||||
|
||||
private:
|
||||
int last;
|
||||
};
|
||||
|
||||
|
||||
template <typename D>
|
||||
struct BrdRowReflect101: BrdReflect101
|
||||
template <typename D> struct BrdRowReflect101 : BrdReflect101
|
||||
{
|
||||
explicit BrdRowReflect101(int len): BrdReflect101(len) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_low(i)]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_high(i)]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename D>
|
||||
struct BrdColReflect101: BrdReflect101
|
||||
template <typename D> struct BrdColReflect101 : BrdReflect101
|
||||
{
|
||||
BrdColReflect101(int len, int step): BrdReflect101(len), step(step) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
|
||||
}
|
||||
|
||||
private:
|
||||
int step;
|
||||
};
|
||||
|
||||
|
||||
struct BrdReplicate
|
||||
{
|
||||
explicit BrdReplicate(int len): last(len - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_low(int i) const
|
||||
{
|
||||
return max(i, 0);
|
||||
return ::max(i, 0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx_high(int i) const
|
||||
{
|
||||
return min(i, last);
|
||||
return ::min(i, last);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ int idx(int i) const
|
||||
@ -142,64 +131,52 @@ namespace cv { namespace gpu { namespace device
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
int last;
|
||||
};
|
||||
|
||||
|
||||
template <typename D>
|
||||
struct BrdRowReplicate: BrdReplicate
|
||||
template <typename D> struct BrdRowReplicate : BrdReplicate
|
||||
{
|
||||
explicit BrdRowReplicate(int len): BrdReplicate(len) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_low(i)]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(data[idx_high(i)]);
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename D>
|
||||
struct BrdColReplicate: BrdReplicate
|
||||
template <typename D> struct BrdColReplicate : BrdReplicate
|
||||
{
|
||||
BrdColReplicate(int len, int step): BrdReplicate(len), step(step) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
|
||||
}
|
||||
|
||||
private:
|
||||
int step;
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
struct BrdRowConstant
|
||||
template <typename D> struct BrdRowConstant
|
||||
{
|
||||
explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return i >= 0 ? saturate_cast<D>(data[i]) : val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return i < len ? saturate_cast<D>(data[i]) : val;
|
||||
}
|
||||
@ -209,24 +186,20 @@ namespace cv { namespace gpu { namespace device
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
int len;
|
||||
D val;
|
||||
};
|
||||
|
||||
template <typename D>
|
||||
struct BrdColConstant
|
||||
template <typename D> struct BrdColConstant
|
||||
{
|
||||
BrdColConstant(int len_, int step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
return i >= 0 ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
__device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const
|
||||
{
|
||||
return i < len ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;
|
||||
}
|
||||
@ -236,15 +209,12 @@ namespace cv { namespace gpu { namespace device
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
int len;
|
||||
int step;
|
||||
D val;
|
||||
};
|
||||
|
||||
|
||||
template <typename OutT>
|
||||
struct BrdConstant
|
||||
template <typename OutT> struct BrdConstant
|
||||
{
|
||||
BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {}
|
||||
|
||||
@ -255,11 +225,9 @@ namespace cv { namespace gpu { namespace device
|
||||
return val;
|
||||
}
|
||||
|
||||
private:
|
||||
int w, h;
|
||||
OutT val;
|
||||
};
|
||||
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||
|
221
modules/gpu/src/opencv2/gpu/device/color.hpp
Normal file
221
modules/gpu/src/opencv2/gpu/device/color.hpp
Normal file
@ -0,0 +1,221 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_COLOR_HPP__
|
||||
#define __OPENCV_GPU_COLOR_HPP__
|
||||
|
||||
#include "detail/color.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
// All OPENCV_GPU_IMPLEMENT_*_TRAITS(ColorSpace1_to_ColorSpace2, ...) macros implements
|
||||
// template <typename T> class ColorSpace1_to_ColorSpace2_traits
|
||||
// {
|
||||
// typedef ... functor_type;
|
||||
// static __host__ __device__ functor_type create_functor();
|
||||
// };
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgr_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS(bgra_to_rgba, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr555, 3, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgr_to_bgr565, 3, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr555, 3, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgb_to_bgr565, 3, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr555, 4, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(bgra_to_bgr565, 4, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr555, 4, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS(rgba_to_bgr565, 4, 2, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgb, 3, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgb, 3, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgr, 3, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgr, 3, 0, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_rgba, 4, 2, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_rgba, 4, 2, 6)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr555_to_bgra, 4, 0, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS(bgr565_to_bgra, 4, 0, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgr, 3)
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS(gray_to_bgra, 4)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr555, 5)
|
||||
OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS(gray_to_bgr565, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_GRAY2RGB5x5_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr555_to_gray, 5)
|
||||
OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS(bgr565_to_gray, 6)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB5x52GRAY_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgb_to_gray, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgr_to_gray, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(rgba_to_gray, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS(bgra_to_gray, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2GRAY_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgb_to_yuv4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(rgba_to_yuv4, 4, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgr_to_yuv4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS(bgra_to_yuv4, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YUV_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgb, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_rgba, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgb, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_rgba, 4, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgr, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv_to_bgra, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgr, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS(yuv4_to_bgra, 4, 4, 2)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_YUV2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgb_to_YCrCb4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(rgba_to_YCrCb4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgr_to_YCrCb4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS(bgra_to_YCrCb4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2YCrCb_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS(YCrCb4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_YCrCb2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgb_to_xyz4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(rgba_to_xyz4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgr_to_xyz4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS(bgra_to_xyz4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2XYZ_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS(xyz4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_XYZ2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgb_to_hsv4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(rgba_to_hsv4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgr_to_hsv4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS(bgra_to_hsv4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HSV_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS(hsv4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_HSV2RGB_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgb_to_hls4, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(rgba_to_hls4, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgr_to_hls4, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS(bgra_to_hls4, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_RGB2HLS_TRAITS
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgb, 3, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_rgba, 3, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgb, 4, 3, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_rgba, 4, 4, 2)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgr, 3, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls_to_bgra, 3, 4, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgr, 4, 3, 0)
|
||||
OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS(hls4_to_bgra, 4, 4, 0)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_HLS2RGB_TRAITS
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
@ -44,6 +44,7 @@
|
||||
#define __OPENCV_GPU_DATAMOV_UTILS_HPP__
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "utility.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
@ -55,49 +56,40 @@ namespace cv { namespace gpu { namespace device
|
||||
__device__ __forceinline__ static void Load(const T* ptr, int offset, T& val) { val = ptr[offset]; }
|
||||
};
|
||||
|
||||
#else // __CUDA_ARCH__ >= 200
|
||||
|
||||
#if defined(_WIN64) || defined(__LP64__)
|
||||
// 64-bit register modifier for inlined asm
|
||||
#define _OPENCV_ASM_PTR_ "l"
|
||||
#else
|
||||
// 32-bit register modifier for inlined asm
|
||||
#define _OPENCV_ASM_PTR_ "r"
|
||||
#endif
|
||||
#else // __CUDA_ARCH__ >= 200
|
||||
|
||||
template<class T> struct ForceGlob;
|
||||
|
||||
#define DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : _OPENCV_ASM_PTR_(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
#define DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : _OPENCV_ASM_PTR_(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||
DEFINE_FORCE_GLOB_B(schar, s8)
|
||||
DEFINE_FORCE_GLOB_B(char, b8)
|
||||
DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||
DEFINE_FORCE_GLOB (short, s16, h)
|
||||
DEFINE_FORCE_GLOB (uint, u32, r)
|
||||
DEFINE_FORCE_GLOB (int, s32, r)
|
||||
DEFINE_FORCE_GLOB (float, f32, f)
|
||||
DEFINE_FORCE_GLOB (double, f64, d)
|
||||
|
||||
#define OPENCV_GPU_DEFINE_FORCE_GLOB(base_type, ptx_type, reg_mod) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "="#reg_mod(val) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
#undef DEFINE_FORCE_GLOB
|
||||
#undef DEFINE_FORCE_GLOB_B
|
||||
#undef _OPENCV_ASM_PTR_
|
||||
#define OPENCV_GPU_DEFINE_FORCE_GLOB_B(base_type, ptx_type) \
|
||||
template <> struct ForceGlob<base_type> \
|
||||
{ \
|
||||
__device__ __forceinline__ static void Load(const base_type* ptr, int offset, base_type& val) \
|
||||
{ \
|
||||
asm("ld.global."#ptx_type" %0, [%1];" : "=r"(*reinterpret_cast<uint*>(&val)) : OPENCV_GPU_ASM_PTR(ptr + offset)); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(uchar, u8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(schar, s8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB_B(char, b8)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (ushort, u16, h)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (short, s16, h)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (uint, u32, r)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (int, s32, r)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (float, f32, f)
|
||||
OPENCV_GPU_DEFINE_FORCE_GLOB (double, f64, d)
|
||||
|
||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB
|
||||
#undef OPENCV_GPU_DEFINE_FORCE_GLOB_B
|
||||
|
||||
#endif // __CUDA_ARCH__ >= 200
|
||||
}}}
|
||||
|
1037
modules/gpu/src/opencv2/gpu/device/detail/color.hpp
Normal file
1037
modules/gpu/src/opencv2/gpu/device/detail/color.hpp
Normal file
File diff suppressed because it is too large
Load Diff
429
modules/gpu/src/opencv2/gpu/device/detail/transform.hpp
Normal file
429
modules/gpu/src/opencv2/gpu/device/detail/transform.hpp
Normal file
@ -0,0 +1,429 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
#define __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
//! Mask accessor
|
||||
|
||||
class MaskReader
|
||||
{
|
||||
public:
|
||||
explicit MaskReader(const PtrStep& mask_): mask(mask_) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
|
||||
|
||||
private:
|
||||
PtrStep mask;
|
||||
};
|
||||
|
||||
struct NoMask
|
||||
{
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const { return true; }
|
||||
};
|
||||
|
||||
//! Read Write Traits
|
||||
|
||||
template <size_t src_elem_size, size_t dst_elem_size>
|
||||
struct UnReadWriteTraits_
|
||||
{
|
||||
enum { shift = 1 };
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 1>
|
||||
{
|
||||
enum { shift = 4 };
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 2>
|
||||
{
|
||||
enum { shift = 2 };
|
||||
};
|
||||
template <typename T, typename D> struct UnReadWriteTraits
|
||||
{
|
||||
enum { shift = UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift };
|
||||
|
||||
typedef typename TypeVec<T, shift>::vec_type read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
|
||||
struct BinReadWriteTraits_
|
||||
{
|
||||
enum { shift = 1 };
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
|
||||
{
|
||||
enum { shift = 4 };
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
|
||||
{
|
||||
enum { shift = 2 };
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
|
||||
{
|
||||
enum {shift = BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
|
||||
|
||||
typedef typename TypeVec<T1, shift>::vec_type read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_type read_type2;
|
||||
typedef typename TypeVec<D , shift>::vec_type write_type;
|
||||
};
|
||||
|
||||
//! Transform kernels
|
||||
|
||||
template <int shift> struct OpUnroller;
|
||||
template <> struct OpUnroller<1>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<2>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<3>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<4>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src.w);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src1.w, src2.w);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
|
||||
{
|
||||
typedef typename UnReadWriteTraits<T, D>::read_type read_type;
|
||||
typedef typename UnReadWriteTraits<T, D>::write_type write_type;
|
||||
const int shift = UnReadWriteTraits<T, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src_.cols)
|
||||
{
|
||||
read_type src_n_el = ((const read_type*)src)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows && mask(y, x))
|
||||
{
|
||||
dst.ptr(y)[x] = op(src.ptr(y)[x]);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
|
||||
const Mask mask, BinOp op)
|
||||
{
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
|
||||
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src1_.cols)
|
||||
{
|
||||
read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst,
|
||||
const Mask mask, BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src1.cols && y < src1.rows && mask(y, x))
|
||||
{
|
||||
T1 src1_data = src1.ptr(y)[x];
|
||||
T2 src2_data = src2.ptr(y)[x];
|
||||
dst.ptr(y)[x] = op(src1_data, src2_data);
|
||||
}
|
||||
}
|
||||
|
||||
template <bool UseSmart> struct TransformDispatcher;
|
||||
template<> struct TransformDispatcher<false>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src1.cols, threads.x);
|
||||
grid.y = divUp(src1.rows, threads.y);
|
||||
|
||||
transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
const int shift = UnReadWriteTraits<T, D>::shift;
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src.cols, threads.x * shift);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src1.cols, threads.x * shift);
|
||||
grid.y = divUp(src1.rows, threads.y);
|
||||
|
||||
transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, int scn, int dcn> struct UseSmartUn_
|
||||
{
|
||||
static const bool value = false;
|
||||
};
|
||||
template <typename T, typename D> struct UseSmartUn_<T, D, 1, 1>
|
||||
{
|
||||
static const bool value = UnReadWriteTraits<T, D>::shift != 1;
|
||||
};
|
||||
template <typename T, typename D> struct UseSmartUn
|
||||
{
|
||||
static const bool value = UseSmartUn_<T, D, VecTraits<T>::cn, VecTraits<D>::cn>::value;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, int src1cn, int src2cn, int dstcn> struct UseSmartBin_
|
||||
{
|
||||
static const bool value = false;
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct UseSmartBin_<T1, T2, D, 1, 1, 1>
|
||||
{
|
||||
static const bool value = BinReadWriteTraits<T1, T2, D>::shift != 1;
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct UseSmartBin
|
||||
{
|
||||
static const bool value = UseSmartBin_<T1, T2, D, VecTraits<T1>::cn, VecTraits<T2>::cn, VecTraits<D>::cn>::value;
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
TransformDispatcher< UseSmartUn<T, D>::value >::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, const Mask& mask, cudaStream_t stream)
|
||||
{
|
||||
TransformDispatcher< UseSmartBin<T1, T2, D>::value >::call(src1, src2, dst, op, mask, stream);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_TRANSFORM_DETAIL_HPP__
|
338
modules/gpu/src/opencv2/gpu/device/functional.hpp
Normal file
338
modules/gpu/src/opencv2/gpu/device/functional.hpp
Normal file
@ -0,0 +1,338 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_FUNCTIONAL_HPP__
|
||||
#define __OPENCV_GPU_FUNCTIONAL_HPP__
|
||||
|
||||
#include <thrust/functional.h>
|
||||
#include "internal_shared.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
using thrust::unary_function;
|
||||
using thrust::binary_function;
|
||||
|
||||
using thrust::plus;
|
||||
using thrust::minus;
|
||||
using thrust::multiplies;
|
||||
using thrust::divides;
|
||||
using thrust::modulus;
|
||||
using thrust::negate;
|
||||
|
||||
using thrust::equal_to;
|
||||
using thrust::not_equal_to;
|
||||
using thrust::greater;
|
||||
using thrust::less;
|
||||
using thrust::greater_equal;
|
||||
using thrust::less_equal;
|
||||
|
||||
using thrust::logical_and;
|
||||
using thrust::logical_or;
|
||||
using thrust::logical_not;
|
||||
|
||||
using thrust::bit_and;
|
||||
using thrust::bit_or;
|
||||
using thrust::bit_xor;
|
||||
template <typename T> struct bit_not : public unary_function<T, T>
|
||||
{
|
||||
__forceinline__ __device__ T operator ()(const T& v) const {return ~v;}
|
||||
};
|
||||
|
||||
using thrust::identity;
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_MINMAX(name, type, op) \
|
||||
template <> struct name<type> : public binary_function<type, type, type> \
|
||||
{ \
|
||||
__forceinline__ __device__ type operator()(type lhs, type rhs) const {return op(lhs, rhs);} \
|
||||
};
|
||||
|
||||
template <typename T> struct maximum : public binary_function<T, T, T>
|
||||
{
|
||||
__forceinline__ __device__ T operator()(const T& lhs, const T& rhs) const {return lhs < rhs ? rhs : lhs;}
|
||||
};
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uchar, max)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, schar, max)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, char, max)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, ushort, max)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, short, max)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, int, max)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, uint, max)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, float, fmax)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(maximum, double, fmax)
|
||||
|
||||
template <typename T> struct minimum : public binary_function<T, T, T>
|
||||
{
|
||||
__forceinline__ __device__ T operator()(const T &lhs, const T &rhs) const {return lhs < rhs ? lhs : rhs;}
|
||||
};
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uchar, min)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, schar, min)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, char, min)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, ushort, min)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, short, min)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, int, min)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, uint, min)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, float, fmin)
|
||||
OPENCV_GPU_IMPLEMENT_MINMAX(minimum, double, fmin)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_MINMAX
|
||||
|
||||
using thrust::project1st;
|
||||
using thrust::project2nd;
|
||||
|
||||
using thrust::unary_negate;
|
||||
using thrust::not1;
|
||||
|
||||
using thrust::binary_negate;
|
||||
using thrust::not2;
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(func) \
|
||||
template <typename T> struct func ## _func : public unary_function<T, float> \
|
||||
{ \
|
||||
__forceinline__ __device__ float operator ()(const T& v) \
|
||||
{ \
|
||||
return func ## f(v); \
|
||||
} \
|
||||
}; \
|
||||
template <> struct func ## _func<double> : public unary_function<double, double> \
|
||||
{ \
|
||||
__forceinline__ __device__ double operator ()(double v) \
|
||||
{ \
|
||||
return func(v); \
|
||||
} \
|
||||
};
|
||||
#define OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(func) \
|
||||
template <typename T> struct func ## _func : public binary_function<T, T, float> \
|
||||
{ \
|
||||
__forceinline__ __device__ float operator ()(const T& v1, const T& v2) \
|
||||
{ \
|
||||
return func ## f(v1, v2); \
|
||||
} \
|
||||
}; \
|
||||
template <> struct func ## _func<double> : public binary_function<double, double, double> \
|
||||
{ \
|
||||
__forceinline__ __device__ double operator ()(double v1, double v2) \
|
||||
{ \
|
||||
return func(v1, v2); \
|
||||
} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(fabs)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sqrt)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp2)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(exp10)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log2)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(log10)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sin)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cos)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tan)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asin)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acos)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atan)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(sinh)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(cosh)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(tanh)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(asinh)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(acosh)
|
||||
OPENCV_GPU_IMPLEMENT_UN_FUNCTOR(atanh)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(hypot)
|
||||
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(atan2)
|
||||
OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR(pow)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_UN_FUNCTOR
|
||||
#undef OPENCV_GPU_IMPLEMENT_BIN_FUNCTOR
|
||||
|
||||
template<typename T> struct hypot_sqr_func : public binary_function<T, T, float>
|
||||
{
|
||||
__forceinline__ __device__ T operator ()(T src1, T src2) const
|
||||
{
|
||||
return src1 * src1 + src2 * src2;
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D> struct saturate_cast_func : public unary_function<T, D>
|
||||
{
|
||||
__forceinline__ __device__ D operator ()(const T& v)
|
||||
{
|
||||
return saturate_cast<D>(v);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct thresh_binary_func : public unary_function<T, T>
|
||||
{
|
||||
__forceinline__ __host__ __device__ thresh_binary_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__forceinline__ __device__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? maxVal : 0;
|
||||
}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
template <typename T> struct thresh_binary_inv_func : public unary_function<T, T>
|
||||
{
|
||||
__forceinline__ __host__ __device__ thresh_binary_inv_func(T thresh_, T maxVal_) : thresh(thresh_), maxVal(maxVal_) {}
|
||||
|
||||
__forceinline__ __device__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : maxVal;
|
||||
}
|
||||
|
||||
T thresh;
|
||||
T maxVal;
|
||||
};
|
||||
template <typename T> struct thresh_trunc_func : public unary_function<T, T>
|
||||
{
|
||||
explicit __forceinline__ __host__ __device__ thresh_trunc_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
||||
|
||||
__forceinline__ __device__ T operator()(const T& src) const
|
||||
{
|
||||
return minimum<T>()(src, thresh);
|
||||
}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
template <typename T> struct thresh_to_zero_func : public unary_function<T, T>
|
||||
{
|
||||
public:
|
||||
explicit __forceinline__ __host__ __device__ thresh_to_zero_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
||||
|
||||
__forceinline__ __device__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? src : 0;
|
||||
}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
template <typename T> struct thresh_to_zero_inv_func : public unary_function<T, T>
|
||||
{
|
||||
public:
|
||||
explicit __forceinline__ __host__ __device__ thresh_to_zero_inv_func(T thresh_, T maxVal_ = 0) : thresh(thresh_) {}
|
||||
|
||||
__forceinline__ __device__ T operator()(const T& src) const
|
||||
{
|
||||
return src > thresh ? 0 : src;
|
||||
}
|
||||
|
||||
T thresh;
|
||||
};
|
||||
|
||||
template <typename Op> struct binder1st : public unary_function<typename Op::second_argument_type, typename Op::result_type>
|
||||
{
|
||||
__forceinline__ __host__ __device__ binder1st(const Op& op_, const typename Op::first_argument_type& arg1_) : op(op_), arg1(arg1_) {}
|
||||
|
||||
__forceinline__ __device__ typename Op::result_type operator ()(const typename Op::second_argument_type& a)
|
||||
{
|
||||
return op(arg1, a);
|
||||
}
|
||||
|
||||
Op op;
|
||||
typename Op::first_argument_type arg1;
|
||||
};
|
||||
template <typename Op, typename T> static __forceinline__ __host__ __device__ binder1st<Op> bind1st(const Op& op, const T& x)
|
||||
{
|
||||
return binder1st<Op>(op, typename Op::first_argument_type(x));
|
||||
}
|
||||
template <typename Op> struct binder2nd : public unary_function<typename Op::first_argument_type, typename Op::result_type>
|
||||
{
|
||||
__forceinline__ __host__ __device__ binder2nd(const Op& op_, const typename Op::second_argument_type& arg2_) : op(op_), arg2(arg2_) {}
|
||||
|
||||
__forceinline__ __device__ typename Op::result_type operator ()(const typename Op::first_argument_type& a)
|
||||
{
|
||||
return op(a, arg2);
|
||||
}
|
||||
|
||||
Op op;
|
||||
typename Op::second_argument_type arg2;
|
||||
};
|
||||
template <typename Op, typename T> static __forceinline__ __host__ __device__ binder2nd<Op> bind2nd(const Op& op, const T& x)
|
||||
{
|
||||
return binder2nd<Op>(op, typename Op::second_argument_type(x));
|
||||
}
|
||||
|
||||
template <typename T1, typename T2> struct BinOpTraits
|
||||
{
|
||||
typedef int argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, T>
|
||||
{
|
||||
typedef T argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<double, T>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<double, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<T, float>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <typename T> struct BinOpTraits<float, T>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<float, float>
|
||||
{
|
||||
typedef float argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<double, float>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
template <> struct BinOpTraits<float, double>
|
||||
{
|
||||
typedef double argument_type;
|
||||
};
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_FUNCTIONAL_HPP__
|
@ -45,7 +45,7 @@
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<class T> struct numeric_limits_gpu
|
||||
template<class T> struct numeric_limits
|
||||
{
|
||||
typedef T type;
|
||||
__device__ __forceinline__ static type min() { return type(); };
|
||||
@ -59,7 +59,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<bool>
|
||||
template<> struct numeric_limits<bool>
|
||||
{
|
||||
typedef bool type;
|
||||
__device__ __forceinline__ static type min() { return false; };
|
||||
@ -73,7 +73,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<char>
|
||||
template<> struct numeric_limits<char>
|
||||
{
|
||||
typedef char type;
|
||||
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
||||
@ -87,7 +87,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = (char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<signed char>
|
||||
template<> struct numeric_limits<signed char>
|
||||
{
|
||||
typedef char type;
|
||||
__device__ __forceinline__ static type min() { return CHAR_MIN; };
|
||||
@ -101,7 +101,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = (signed char)-1 == -1;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<unsigned char>
|
||||
template<> struct numeric_limits<unsigned char>
|
||||
{
|
||||
typedef unsigned char type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
@ -115,7 +115,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<short>
|
||||
template<> struct numeric_limits<short>
|
||||
{
|
||||
typedef short type;
|
||||
__device__ __forceinline__ static type min() { return SHRT_MIN; };
|
||||
@ -129,7 +129,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<unsigned short>
|
||||
template<> struct numeric_limits<unsigned short>
|
||||
{
|
||||
typedef unsigned short type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
@ -143,7 +143,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<int>
|
||||
template<> struct numeric_limits<int>
|
||||
{
|
||||
typedef int type;
|
||||
__device__ __forceinline__ static type min() { return INT_MIN; };
|
||||
@ -158,7 +158,7 @@ namespace cv { namespace gpu { namespace device
|
||||
};
|
||||
|
||||
|
||||
template<> struct numeric_limits_gpu<unsigned int>
|
||||
template<> struct numeric_limits<unsigned int>
|
||||
{
|
||||
typedef unsigned int type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
@ -172,7 +172,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<long>
|
||||
template<> struct numeric_limits<long>
|
||||
{
|
||||
typedef long type;
|
||||
__device__ __forceinline__ static type min() { return LONG_MIN; };
|
||||
@ -186,7 +186,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<unsigned long>
|
||||
template<> struct numeric_limits<unsigned long>
|
||||
{
|
||||
typedef unsigned long type;
|
||||
__device__ __forceinline__ static type min() { return 0; };
|
||||
@ -200,7 +200,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = false;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<float>
|
||||
template<> struct numeric_limits<float>
|
||||
{
|
||||
typedef float type;
|
||||
__device__ __forceinline__ static type min() { return 1.175494351e-38f/*FLT_MIN*/; };
|
||||
@ -214,7 +214,7 @@ namespace cv { namespace gpu { namespace device
|
||||
static const bool is_signed = true;
|
||||
};
|
||||
|
||||
template<> struct numeric_limits_gpu<double>
|
||||
template<> struct numeric_limits<double>
|
||||
{
|
||||
typedef double type;
|
||||
__device__ __forceinline__ static type min() { return 2.2250738585072014e-308/*DBL_MIN*/; };
|
@ -45,128 +45,122 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace gpu
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||
{ return (uchar)max((int)v, 0); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||
{ return (uchar)min((uint)v, (uint)UCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||
{ return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||
{ return (uchar)min(v, (uint)UCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||
{ return saturate_cast<uchar>((uint)v); }
|
||||
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
namespace device
|
||||
{
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(uchar v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(schar v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(ushort v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(short v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(uint v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(int v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(float v) { return _Tp(v); }
|
||||
template<typename _Tp> static __device__ __forceinline__ _Tp saturate_cast(double v) { return _Tp(v); }
|
||||
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(schar v)
|
||||
{ return (uchar)max((int)v, 0); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(ushort v)
|
||||
{ return (uchar)min((uint)v, (uint)UCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(int v)
|
||||
{ return (uchar)((uint)v <= UCHAR_MAX ? v : v > 0 ? UCHAR_MAX : 0); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(uint v)
|
||||
{ return (uchar)min(v, (uint)UCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(short v)
|
||||
{ return saturate_cast<uchar>((uint)v); }
|
||||
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<uchar>(iv); }
|
||||
template<> static __device__ __forceinline__ uchar saturate_cast<uchar>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||
{ return (schar)min((int)v, SCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||
{ return (schar)min((uint)v, (uint)SCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?
|
||||
v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||
}
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||
{ return saturate_cast<schar>((int)v); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||
{ return (schar)min(v, (uint)SCHAR_MAX); }
|
||||
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<schar>(iv);
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||
{ return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||
{ return (ushort)min(v, (uint)USHRT_MAX); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||
{ return (short)min((int)v, SHRT_MAX); }
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||
{
|
||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?
|
||||
v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||
}
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||
{ return (short)min(v, (uint)SHRT_MAX); }
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<short>(iv); }
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<short>(iv);
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ int saturate_cast<int>(float v) { return __float2int_rn(v); }
|
||||
template<> static __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }
|
||||
template<> static __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<uchar>(iv);
|
||||
#else
|
||||
return saturate_cast<uchar>((float)v);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(uchar v)
|
||||
{ return (schar)min((int)v, SCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(ushort v)
|
||||
{ return (schar)min((uint)v, (uint)SCHAR_MAX); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(int v)
|
||||
{
|
||||
return (schar)((uint)(v-SCHAR_MIN) <= (uint)UCHAR_MAX ?
|
||||
v : v > 0 ? SCHAR_MAX : SCHAR_MIN);
|
||||
}
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(short v)
|
||||
{ return saturate_cast<schar>((int)v); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(uint v)
|
||||
{ return (schar)min(v, (uint)SCHAR_MAX); }
|
||||
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<schar>(iv); }
|
||||
template<> static __device__ __forceinline__ schar saturate_cast<schar>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<schar>(iv);
|
||||
#else
|
||||
return saturate_cast<schar>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(schar v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(short v)
|
||||
{ return (ushort)max((int)v, 0); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(int v)
|
||||
{ return (ushort)((uint)v <= (uint)USHRT_MAX ? v : v > 0 ? USHRT_MAX : 0); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(uint v)
|
||||
{ return (ushort)min(v, (uint)USHRT_MAX); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<ushort>(iv); }
|
||||
template<> static __device__ __forceinline__ ushort saturate_cast<ushort>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<ushort>(iv);
|
||||
#else
|
||||
return saturate_cast<ushort>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(ushort v)
|
||||
{ return (short)min((int)v, SHRT_MAX); }
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(int v)
|
||||
{
|
||||
return (short)((uint)(v - SHRT_MIN) <= (uint)USHRT_MAX ?
|
||||
v : v > 0 ? SHRT_MAX : SHRT_MIN);
|
||||
}
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(uint v)
|
||||
{ return (short)min(v, (uint)SHRT_MAX); }
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(float v)
|
||||
{ int iv = __float2int_rn(v); return saturate_cast<short>(iv); }
|
||||
template<> static __device__ __forceinline__ short saturate_cast<short>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
int iv = __double2int_rn(v); return saturate_cast<short>(iv);
|
||||
#else
|
||||
return saturate_cast<short>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ int saturate_cast<int>(float v) { return __float2int_rn(v); }
|
||||
template<> static __device__ __forceinline__ int saturate_cast<int>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2int_rn(v);
|
||||
#else
|
||||
return saturate_cast<int>((float)v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template<> static __device__ __forceinline__ uint saturate_cast<uint>(float v){ return __float2uint_rn(v); }
|
||||
template<> static __device__ __forceinline__ uint saturate_cast<uint>(double v)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 130
|
||||
return __double2uint_rn(v);
|
||||
#else
|
||||
return saturate_cast<uint>((float)v);
|
||||
#endif
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif /* __OPENCV_GPU_SATURATE_CAST_HPP__ */
|
@ -43,421 +43,34 @@
|
||||
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
|
||||
#define __OPENCV_GPU_TRANSFORM_HPP__
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "vecmath.hpp"
|
||||
#include "detail/transform.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
//! Mask accessor
|
||||
|
||||
class MaskReader
|
||||
template <typename T, typename D, typename UnOp>
|
||||
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream = 0)
|
||||
{
|
||||
public:
|
||||
explicit MaskReader(const PtrStep& mask_): mask(mask_) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const { return mask.ptr(y)[x]; }
|
||||
|
||||
private:
|
||||
PtrStep mask;
|
||||
};
|
||||
|
||||
struct NoMask
|
||||
detail::transform_caller(src, dst, op, detail::NoMask(), stream);
|
||||
}
|
||||
template <typename T, typename D, typename UnOp>
|
||||
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStep& mask, UnOp op,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
__device__ __forceinline__ bool operator()(int y, int x) const { return true; }
|
||||
};
|
||||
|
||||
//! Read Write Traits
|
||||
|
||||
template <size_t src_elem_size, size_t dst_elem_size>
|
||||
struct UnReadWriteTraits_
|
||||
{
|
||||
enum { shift = 1 };
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 1>
|
||||
{
|
||||
enum { shift = 4 };
|
||||
};
|
||||
template <size_t src_elem_size>
|
||||
struct UnReadWriteTraits_<src_elem_size, 2>
|
||||
{
|
||||
enum { shift = 2 };
|
||||
};
|
||||
template <typename T, typename D> struct UnReadWriteTraits
|
||||
{
|
||||
enum { shift = UnReadWriteTraits_<sizeof(T), sizeof(D)>::shift };
|
||||
|
||||
typedef typename TypeVec<T, shift>::vec_t read_type;
|
||||
typedef typename TypeVec<D, shift>::vec_t write_type;
|
||||
};
|
||||
|
||||
template <size_t src_elem_size1, size_t src_elem_size2, size_t dst_elem_size>
|
||||
struct BinReadWriteTraits_
|
||||
{
|
||||
enum { shift = 1 };
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 1>
|
||||
{
|
||||
enum { shift = 4 };
|
||||
};
|
||||
template <size_t src_elem_size1, size_t src_elem_size2>
|
||||
struct BinReadWriteTraits_<src_elem_size1, src_elem_size2, 2>
|
||||
{
|
||||
enum { shift = 2 };
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct BinReadWriteTraits
|
||||
{
|
||||
enum {shift = BinReadWriteTraits_<sizeof(T1), sizeof(T2), sizeof(D)>::shift};
|
||||
|
||||
typedef typename TypeVec<T1, shift>::vec_t read_type1;
|
||||
typedef typename TypeVec<T2, shift>::vec_t read_type2;
|
||||
typedef typename TypeVec<D , shift>::vec_t write_type;
|
||||
};
|
||||
|
||||
//! Transform kernels
|
||||
|
||||
template <int shift> struct OpUnroller;
|
||||
template <> struct OpUnroller<1>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<2>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<3>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
}
|
||||
};
|
||||
template <> struct OpUnroller<4>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T& src, D& dst, const Mask& mask, UnOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src.w);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __device__ __forceinline__ void unroll(const T1& src1, const T2& src2, D& dst, const Mask& mask, BinOp& op, int x_shifted, int y)
|
||||
{
|
||||
if (mask(y, x_shifted))
|
||||
dst.x = op(src1.x, src2.x);
|
||||
if (mask(y, x_shifted + 1))
|
||||
dst.y = op(src1.y, src2.y);
|
||||
if (mask(y, x_shifted + 2))
|
||||
dst.z = op(src1.z, src2.z);
|
||||
if (mask(y, x_shifted + 3))
|
||||
dst.w = op(src1.w, src2.w);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T> src_, PtrStep_<D> dst_, const Mask mask, UnOp op)
|
||||
{
|
||||
typedef typename UnReadWriteTraits<T, D>::read_type read_type;
|
||||
typedef typename UnReadWriteTraits<T, D>::write_type write_type;
|
||||
const int shift = UnReadWriteTraits<T, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src_.rows)
|
||||
{
|
||||
const T* src = src_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src_.cols)
|
||||
{
|
||||
read_type src_n_el = ((const read_type*)src)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
detail::transform_caller(src, dst, op, detail::MaskReader(mask), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static __global__ void transformSimple(const DevMem2D_<T> src, PtrStep_<D> dst, const Mask mask, UnOp op)
|
||||
template <typename T1, typename T2, typename D, typename BinOp>
|
||||
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
BinOp op, cudaStream_t stream = 0)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows && mask(y, x))
|
||||
{
|
||||
dst.ptr(y)[x] = op(src.ptr(y)[x]);
|
||||
}
|
||||
detail::transform_caller(src1, src2, dst, op, detail::NoMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
__global__ static void transformSmart(const DevMem2D_<T1> src1_, const PtrStep_<T2> src2_, PtrStep_<D> dst_,
|
||||
const Mask mask, BinOp op)
|
||||
template <typename T1, typename T2, typename D, typename BinOp>
|
||||
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
const PtrStep& mask, BinOp op, cudaStream_t stream = 0)
|
||||
{
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type1 read_type1;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::read_type2 read_type2;
|
||||
typedef typename BinReadWriteTraits<T1, T2, D>::write_type write_type;
|
||||
const int shift = BinReadWriteTraits<T1, T2, D>::shift;
|
||||
|
||||
const int x = threadIdx.x + blockIdx.x * blockDim.x;
|
||||
const int y = threadIdx.y + blockIdx.y * blockDim.y;
|
||||
const int x_shifted = x * shift;
|
||||
|
||||
if (y < src1_.rows)
|
||||
{
|
||||
const T1* src1 = src1_.ptr(y);
|
||||
const T2* src2 = src2_.ptr(y);
|
||||
D* dst = dst_.ptr(y);
|
||||
|
||||
if (x_shifted + shift - 1 < src1_.cols)
|
||||
{
|
||||
read_type1 src1_n_el = ((const read_type1*)src1)[x];
|
||||
read_type2 src2_n_el = ((const read_type2*)src2)[x];
|
||||
write_type dst_n_el;
|
||||
|
||||
OpUnroller<shift>::unroll(src1_n_el, src2_n_el, dst_n_el, mask, op, x_shifted, y);
|
||||
|
||||
((write_type*)dst)[x] = dst_n_el;
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int real_x = x_shifted; real_x < src1_.cols; ++real_x)
|
||||
{
|
||||
if (mask(y, real_x))
|
||||
dst[real_x] = op(src1[real_x], src2[real_x]);
|
||||
}
|
||||
}
|
||||
}
|
||||
detail::transform_caller(src1, src2, dst, op, detail::MaskReader(mask), stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static __global__ void transformSimple(const DevMem2D_<T1> src1, const PtrStep_<T2> src2, PtrStep_<D> dst,
|
||||
const Mask mask, BinOp op)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < src1.cols && y < src1.rows && mask(y, x))
|
||||
{
|
||||
T1 src1_data = src1.ptr(y)[x];
|
||||
T2 src2_data = src2.ptr(y)[x];
|
||||
dst.ptr(y)[x] = op(src1_data, src2_data);
|
||||
}
|
||||
}
|
||||
}}}
|
||||
|
||||
namespace cv
|
||||
{
|
||||
namespace gpu
|
||||
{
|
||||
template <bool UseSmart> struct TransformDispatcher;
|
||||
template<> struct TransformDispatcher<false>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src.cols, threads.x);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
device::transformSimple<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
BinOp op, const Mask& mask, cudaStream_t stream = 0)
|
||||
{
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src1.cols, threads.x);
|
||||
grid.y = divUp(src1.rows, threads.y);
|
||||
|
||||
device::transformSimple<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
template<> struct TransformDispatcher<true>
|
||||
{
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
const int shift = device::UnReadWriteTraits<T, D>::shift;
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src.cols, threads.x * shift);
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
device::transformSmart<T, D><<<grid, threads, 0, stream>>>(src, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
BinOp op, const Mask& mask, cudaStream_t stream = 0)
|
||||
{
|
||||
const int shift = device::BinReadWriteTraits<T1, T2, D>::shift;
|
||||
|
||||
dim3 threads(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
|
||||
grid.x = divUp(src1.cols, threads.x * shift);
|
||||
grid.y = divUp(src1.rows, threads.y);
|
||||
|
||||
device::transformSmart<T1, T2, D><<<grid, threads, 0, stream>>>(src1, src2, dst, mask, op);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T, typename D, int scn, int dcn> struct UseSmartUn_
|
||||
{
|
||||
static const bool value = false;
|
||||
};
|
||||
template <typename T, typename D> struct UseSmartUn_<T, D, 1, 1>
|
||||
{
|
||||
static const bool value = device::UnReadWriteTraits<T, D>::shift != 1;
|
||||
};
|
||||
template <typename T, typename D> struct UseSmartUn
|
||||
{
|
||||
static const bool value = UseSmartUn_<T, D, device::VecTraits<T>::cn, device::VecTraits<D>::cn>::value;
|
||||
};
|
||||
|
||||
template <typename T1, typename T2, typename D, int src1cn, int src2cn, int dstcn> struct UseSmartBin_
|
||||
{
|
||||
static const bool value = false;
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct UseSmartBin_<T1, T2, D, 1, 1, 1>
|
||||
{
|
||||
static const bool value = device::BinReadWriteTraits<T1, T2, D>::shift != 1;
|
||||
};
|
||||
template <typename T1, typename T2, typename D> struct UseSmartBin
|
||||
{
|
||||
static const bool value = UseSmartBin_<T1, T2, D, device::VecTraits<T1>::cn, device::VecTraits<T2>::cn, device::VecTraits<D>::cn>::value;
|
||||
};
|
||||
|
||||
template <typename T, typename D, typename UnOp, typename Mask>
|
||||
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, const Mask& mask,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
TransformDispatcher< UseSmartUn<T, D>::value >::call(src, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T, typename D, typename UnOp>
|
||||
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream = 0)
|
||||
{
|
||||
transform_caller(src, dst, op, device::NoMask(), stream);
|
||||
}
|
||||
template <typename T, typename D, typename UnOp>
|
||||
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStep& mask, UnOp op,
|
||||
cudaStream_t stream = 0)
|
||||
{
|
||||
transform_caller(src, dst, op, device::MaskReader(mask), stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||
static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
BinOp op, const Mask& mask, cudaStream_t stream = 0)
|
||||
{
|
||||
TransformDispatcher< UseSmartBin<T1, T2, D>::value >::call(src1, src2, dst, op, mask, stream);
|
||||
}
|
||||
|
||||
template <typename T1, typename T2, typename D, typename BinOp>
|
||||
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
BinOp op, cudaStream_t stream = 0)
|
||||
{
|
||||
transform_caller(src1, src2, dst, op, device::NoMask(), stream);
|
||||
}
|
||||
template <typename T1, typename T2, typename D, typename BinOp>
|
||||
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst,
|
||||
const PtrStep& mask, BinOp op, cudaStream_t stream = 0)
|
||||
{
|
||||
transform_caller(src1, src2, dst, op, device::MaskReader(mask), stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // __OPENCV_GPU_TRANSFORM_HPP__
|
||||
|
206
modules/gpu/src/opencv2/gpu/device/utility.hpp
Normal file
206
modules/gpu/src/opencv2/gpu/device/utility.hpp
Normal file
@ -0,0 +1,206 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_UTILITY_HPP__
|
||||
#define __OPENCV_GPU_UTILITY_HPP__
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
|
||||
#ifndef __CUDA_ARCH__
|
||||
#define __CUDA_ARCH__ 0
|
||||
#endif
|
||||
|
||||
#define OPENCV_GPU_LOG_WARP_SIZE (5)
|
||||
#define OPENCV_GPU_WARP_SIZE (1 << OPENCV_GPU_LOG_WARP_SIZE)
|
||||
#define OPENCV_GPU_LOG_MEM_BANKS ((__CUDA_ARCH__ >= 200) ? 5 : 4) // 32 banks on fermi, 16 on tesla
|
||||
#define OPENCV_GPU_MEM_BANKS (1 << OPENCV_GPU_LOG_MEM_BANKS)
|
||||
|
||||
#if defined(_WIN64) || defined(__LP64__)
|
||||
// 64-bit register modifier for inlined asm
|
||||
#define OPENCV_GPU_ASM_PTR "l"
|
||||
#else
|
||||
// 32-bit register modifier for inlined asm
|
||||
#define OPENCV_GPU_ASM_PTR "r"
|
||||
#endif
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template <typename T> void __host__ __device__ __forceinline__ swap(T &a, T &b)
|
||||
{
|
||||
T temp = a;
|
||||
a = b;
|
||||
b = temp;
|
||||
}
|
||||
|
||||
// warp-synchronous 32 elements reduction
|
||||
template <typename T, typename Op> __device__ __forceinline__ void warpReduce32(volatile T* data, volatile T& partial_reduction, int tid, Op op)
|
||||
{
|
||||
data[tid] = partial_reduction;
|
||||
|
||||
if (tid < 16)
|
||||
{
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
|
||||
}
|
||||
}
|
||||
|
||||
// warp-synchronous 16 elements reduction
|
||||
template <typename T, typename Op> __device__ __forceinline__ void warpReduce16(volatile T* data, volatile T& partial_reduction, int tid, Op op)
|
||||
{
|
||||
data[tid] = partial_reduction;
|
||||
|
||||
if (tid < 8)
|
||||
{
|
||||
data[tid] = partial_reduction = op(partial_reduction, (T)data[tid + 8 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, (T)data[tid + 4 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, (T)data[tid + 2 ]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, (T)data[tid + 1 ]);
|
||||
}
|
||||
}
|
||||
|
||||
// warp-synchronous reduction
|
||||
template <int n, typename T, typename Op> __device__ __forceinline__ void warpReduce(volatile T* data, volatile T& partial_reduction, int tid, Op op)
|
||||
{
|
||||
if (tid < n)
|
||||
data[tid] = partial_reduction;
|
||||
|
||||
if (n > 16)
|
||||
{
|
||||
if (tid < n - 16)
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
|
||||
if (tid < 8)
|
||||
{
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
|
||||
}
|
||||
}
|
||||
else if (n > 8)
|
||||
{
|
||||
if (tid < n - 8)
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8]);
|
||||
if (tid < 4)
|
||||
{
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
|
||||
}
|
||||
}
|
||||
else if (n > 4)
|
||||
{
|
||||
if (tid < n - 4)
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4]);
|
||||
if (tid < 2)
|
||||
{
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
|
||||
}
|
||||
}
|
||||
else if (n > 2)
|
||||
{
|
||||
if (tid < n - 2)
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2]);
|
||||
if (tid < 2)
|
||||
{
|
||||
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// solve 2x2 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve2x2(const T A[2][2], const T b[2], T x[2])
|
||||
{
|
||||
T det = A[0][0] * A[1][1] - A[1][0] * A[0][1];
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet * (b[0] * A[1][1] - b[1] * A[0][1]));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet * (A[0][0] * b[1] - A[1][0] * b[0]));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// solve 3x3 linear system Ax=b
|
||||
template <typename T> __device__ __forceinline__ bool solve3x3(const T A[3][3], const T b[3], T x[3])
|
||||
{
|
||||
T det = A[0][0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1])
|
||||
- A[0][1] * (A[1][0] * A[2][2] - A[1][2] * A[2][0])
|
||||
+ A[0][2] * (A[1][0] * A[2][1] - A[1][1] * A[2][0]);
|
||||
|
||||
if (det != 0)
|
||||
{
|
||||
double invdet = 1.0 / det;
|
||||
|
||||
x[0] = saturate_cast<T>(invdet *
|
||||
(b[0] * (A[1][1] * A[2][2] - A[1][2] * A[2][1]) -
|
||||
A[0][1] * (b[1] * A[2][2] - A[1][2] * b[2] ) +
|
||||
A[0][2] * (b[1] * A[2][1] - A[1][1] * b[2] )));
|
||||
|
||||
x[1] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (b[1] * A[2][2] - A[1][2] * b[2] ) -
|
||||
b[0] * (A[1][0] * A[2][2] - A[1][2] * A[2][0]) +
|
||||
A[0][2] * (A[1][0] * b[2] - b[1] * A[2][0])));
|
||||
|
||||
x[2] = saturate_cast<T>(invdet *
|
||||
(A[0][0] * (A[1][1] * b[2] - b[1] * A[2][1]) -
|
||||
A[0][1] * (A[1][0] * b[2] - b[1] * A[2][0]) +
|
||||
b[0] * (A[1][0] * A[2][1] - A[1][1] * A[2][0])));
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_UTILITY_HPP__
|
287
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
Normal file
287
modules/gpu/src/opencv2/gpu/device/vec_math.hpp
Normal file
@ -0,0 +1,287 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VECMATH_HPP__
|
||||
#define __OPENCV_GPU_VECMATH_HPP__
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "saturate_cast.hpp"
|
||||
#include "vec_traits.hpp"
|
||||
#include "functional.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
namespace detail
|
||||
{
|
||||
template <int cn, typename VecD> struct SatCastHelper;
|
||||
template <typename VecD> struct SatCastHelper<1, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<2, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<3, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z));
|
||||
}
|
||||
};
|
||||
template <typename VecD> struct SatCastHelper<4, VecD>
|
||||
{
|
||||
template <typename VecS> static __device__ VecD cast(const VecS& v)
|
||||
{
|
||||
typedef typename VecTraits<VecD>::elem_type D;
|
||||
return VecTraits<VecD>::make(saturate_cast<D>(v.x), saturate_cast<D>(v.y), saturate_cast<D>(v.z), saturate_cast<D>(v.w));
|
||||
}
|
||||
};
|
||||
|
||||
template <typename VecD, typename VecS> static __device__ VecD saturate_cast_caller(const VecS& v)
|
||||
{
|
||||
return SatCastHelper<VecTraits<VecD>::cn, VecD>::cast(v);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uchar1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const char1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const ushort1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const short1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uint1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const int1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const float1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const double1& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uchar2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const char2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const ushort2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const short2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uint2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const int2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const float2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const double2& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uchar3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const char3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const ushort3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const short3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uint3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const int3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const float3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const double3& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uchar4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const char4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const ushort4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const short4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const uint4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const int4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const float4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
template<typename _Tp> static __device__ _Tp saturate_cast(const double4& v) {return detail::saturate_cast_caller<_Tp>(v);}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, op, func) \
|
||||
static __device__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x)); \
|
||||
} \
|
||||
static __device__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x), f(a.y)); \
|
||||
} \
|
||||
static __device__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x), f(a.y), f(a.z)); \
|
||||
} \
|
||||
static __device__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x), f(a.y), f(a.z), f(a.w)); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, op, func) \
|
||||
static __device__ TypeVec<func<type>::result_type, 1>::vec_type op(const type ## 1 & a, const type ## 1 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 1>::vec_type>::make(f(a.x, b.x)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(const type ## 1 & v, T s) \
|
||||
{ \
|
||||
func<typename BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(v.x, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type op(T s, const type ## 1 & v) \
|
||||
{ \
|
||||
func<typename BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 1>::vec_type>::make(f(s, v.x)); \
|
||||
} \
|
||||
static __device__ TypeVec<func<type>::result_type, 2>::vec_type op(const type ## 2 & a, const type ## 2 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 2>::vec_type>::make(f(a.x, b.x), f(a.y, b.y)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(const type ## 2 & v, T s) \
|
||||
{ \
|
||||
func<typename BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(v.x, s), f(v.y, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type op(T s, const type ## 2 & v) \
|
||||
{ \
|
||||
func<typename BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 2>::vec_type>::make(f(s, v.x), f(s, v.y)); \
|
||||
} \
|
||||
static __device__ TypeVec<func<type>::result_type, 3>::vec_type op(const type ## 3 & a, const type ## 3 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 3>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(const type ## 3 & v, T s) \
|
||||
{ \
|
||||
func<typename BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type op(T s, const type ## 3 & v) \
|
||||
{ \
|
||||
func<typename BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 3>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z)); \
|
||||
} \
|
||||
static __device__ TypeVec<func<type>::result_type, 4>::vec_type op(const type ## 4 & a, const type ## 4 & b) \
|
||||
{ \
|
||||
func<type> f; \
|
||||
return VecTraits<TypeVec<func<type>::result_type, 4>::vec_type>::make(f(a.x, b.x), f(a.y, b.y), f(a.z, b.z), f(a.w, b.w)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(const type ## 4 & v, T s) \
|
||||
{ \
|
||||
func<typename BinOpTraits<type, T>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(v.x, s), f(v.y, s), f(v.z, s), f(v.w, s)); \
|
||||
} \
|
||||
template <typename T> \
|
||||
static __device__ typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type op(T s, const type ## 4 & v) \
|
||||
{ \
|
||||
func<typename BinOpTraits<T, type>::argument_type> f; \
|
||||
return VecTraits<typename TypeVec<typename func<typename BinOpTraits<type, T>::argument_type>::result_type, 4>::vec_type>::make(f(s, v.x), f(s, v.y), f(s, v.z), f(s, v.w)); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator +, plus) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator -, minus) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator *, multiplies) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator /, divides) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator -, negate) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ==, equal_to) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator !=, not_equal_to) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator > , greater) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator < , less) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator >=, greater_equal) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator <=, less_equal) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &&, logical_and) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ||, logical_or) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ! , logical_not) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, max, maximum) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, min, minimum) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, fabs, fabs_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sqrt, sqrt_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp, exp_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp2, exp2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, exp10, exp10_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log, log_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log2, log2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, log10, log10_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sin, sin_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cos, cos_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tan, tan_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asin, asin_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acos, acos_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atan, atan_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, sinh, sinh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, cosh, cosh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, tanh, tanh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, asinh, asinh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, acosh, acosh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP(type, atanh, atanh_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot, hypot_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, atan2, atan2_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, pow, pow_func) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, hypot_sqr, hypot_sqr_func)
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_INT_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(type) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator &, bit_and) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator |, bit_or) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_BINOP(type, operator ^, bit_xor) \
|
||||
OPENCV_GPU_IMPLEMENT_VEC_UNOP (type, operator ~, bit_not)
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(char)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(short)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(int)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_INT_OP(uint)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(float)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_OP(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_UNOP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_BINOP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_OP
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_INT_OP
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_VECMATH_HPP__
|
142
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
Normal file
142
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
Normal file
@ -0,0 +1,142 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#ifndef __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
#define __OPENCV_GPU_VEC_TRAITS_HPP__
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
template<typename T, int N> struct TypeVec;
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_TYPE_VEC(type) \
|
||||
template<> struct TypeVec<type, 1> { typedef type vec_type; }; \
|
||||
template<> struct TypeVec<type ## 1, 1> { typedef type ## 1 vec_type; }; \
|
||||
template<> struct TypeVec<type, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 2, 2> { typedef type ## 2 vec_type; }; \
|
||||
template<> struct TypeVec<type, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 3, 3> { typedef type ## 3 vec_type; }; \
|
||||
template<> struct TypeVec<type, 4> { typedef type ## 4 vec_type; }; \
|
||||
template<> struct TypeVec<type ## 4, 4> { typedef type ## 4 vec_type; };
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(char)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(short)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(int)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(uint)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(float)
|
||||
OPENCV_GPU_IMPLEMENT_TYPE_VEC(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_TYPE_VEC
|
||||
|
||||
template<> struct TypeVec<schar, 1> { typedef schar vec_type; };
|
||||
template<> struct TypeVec<schar, 2> { typedef char2 vec_type; };
|
||||
template<> struct TypeVec<schar, 3> { typedef char3 vec_type; };
|
||||
template<> struct TypeVec<schar, 4> { typedef char4 vec_type; };
|
||||
|
||||
template<> struct TypeVec<bool, 1> { typedef uchar vec_type; };
|
||||
template<> struct TypeVec<bool, 2> { typedef uchar2 vec_type; };
|
||||
template<> struct TypeVec<bool, 3> { typedef uchar3 vec_type; };
|
||||
template<> struct TypeVec<bool, 4> { typedef uchar4 vec_type; };
|
||||
|
||||
template<typename T> struct VecTraits;
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_VEC_TRAITS(type) \
|
||||
template<> struct VecTraits<type> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ type all(type v) {return v;} \
|
||||
static __device__ __host__ type make(type x) {return x;} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 1> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=1}; \
|
||||
static __device__ __host__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
|
||||
static __device__ __host__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 2> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=2}; \
|
||||
static __device__ __host__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
|
||||
static __device__ __host__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 3> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=3}; \
|
||||
static __device__ __host__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
|
||||
static __device__ __host__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
|
||||
}; \
|
||||
template<> struct VecTraits<type ## 4> \
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
enum {cn=4}; \
|
||||
static __device__ __host__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
|
||||
static __device__ __host__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
|
||||
};
|
||||
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(char)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uint)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(float)
|
||||
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(double)
|
||||
|
||||
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
|
||||
|
||||
template<> struct VecTraits<schar>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ schar all(schar v) {return v;}
|
||||
static __device__ __host__ schar make(schar x) {return x;}
|
||||
};
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_VEC_TRAITS_HPP__
|
File diff suppressed because it is too large
Load Diff
@ -95,9 +95,7 @@ namespace
|
||||
|
||||
img_cols(img.cols), img_rows(img.rows),
|
||||
|
||||
use_mask(!mask.empty()),
|
||||
|
||||
upright(surf.upright)
|
||||
use_mask(!mask.empty())
|
||||
{
|
||||
CV_Assert(!img.empty() && img.type() == CV_8UC1);
|
||||
CV_Assert(mask.empty() || (mask.size() == img.size() && mask.type() == CV_8UC1));
|
||||
@ -224,8 +222,6 @@ namespace
|
||||
|
||||
bool use_mask;
|
||||
|
||||
bool upright;
|
||||
|
||||
int maxCandidates;
|
||||
int maxFeatures;
|
||||
|
||||
|
@ -869,7 +869,7 @@ TEST(GaussianBlur)
|
||||
|
||||
TEST(pyrDown)
|
||||
{
|
||||
gpu::PyrDownBuf buf;
|
||||
gpu::PyrDownBuf buf(Size(4000, 4000), CV_16SC3);
|
||||
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
@ -893,7 +893,7 @@ TEST(pyrDown)
|
||||
|
||||
TEST(pyrUp)
|
||||
{
|
||||
gpu::PyrUpBuf buf;
|
||||
gpu::PyrUpBuf buf(Size(4000, 4000), CV_16SC3);
|
||||
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
@ -914,3 +914,26 @@ TEST(pyrUp)
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
TEST(equalizeHist)
|
||||
{
|
||||
for (int size = 1000; size < 4000; size += 1000)
|
||||
{
|
||||
SUBTEST << "size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_8UC1, 0, 256);
|
||||
Mat dst(src.size(), src.type());
|
||||
|
||||
CPU_ON;
|
||||
equalizeHist(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(src.size(), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::equalizeHist(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user