added registerPageLocked/unregisterPageLocked functions
added convert functions to BruteForceMatcher_GPU other minor fixes
This commit is contained in:
@@ -56,9 +56,8 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Mask strategy
|
||||
|
||||
class SingleMask
|
||||
struct SingleMask
|
||||
{
|
||||
public:
|
||||
explicit SingleMask(const PtrStep& mask_) : mask(mask_) {}
|
||||
|
||||
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
|
||||
@@ -66,13 +65,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return mask.ptr(queryIdx)[trainIdx] != 0;
|
||||
}
|
||||
|
||||
private:
|
||||
PtrStep mask;
|
||||
const PtrStep mask;
|
||||
};
|
||||
|
||||
class MaskCollection
|
||||
struct MaskCollection
|
||||
{
|
||||
public:
|
||||
explicit MaskCollection(PtrStep* maskCollection_) : maskCollection(maskCollection_) {}
|
||||
|
||||
__device__ __forceinline__ void nextMask()
|
||||
@@ -86,15 +83,14 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return curMask.data == 0 || (ForceGlob<uchar>::Load(curMask.ptr(queryIdx), trainIdx, val), (val != 0));
|
||||
}
|
||||
|
||||
private:
|
||||
PtrStep* maskCollection;
|
||||
const PtrStep* maskCollection;
|
||||
PtrStep curMask;
|
||||
};
|
||||
|
||||
class WithOutMask
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ void nextMask()
|
||||
__device__ __forceinline__ void nextMask() const
|
||||
{
|
||||
}
|
||||
__device__ __forceinline__ bool operator()(int queryIdx, int trainIdx) const
|
||||
@@ -128,9 +124,8 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Distance
|
||||
|
||||
template <typename T> class L1Dist
|
||||
template <typename T> struct L1Dist
|
||||
{
|
||||
public:
|
||||
typedef int ResultType;
|
||||
typedef int ValueType;
|
||||
|
||||
@@ -151,12 +146,10 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return mySum;
|
||||
}
|
||||
|
||||
private:
|
||||
int mySum;
|
||||
};
|
||||
template <> class L1Dist<float>
|
||||
template <> struct L1Dist<float>
|
||||
{
|
||||
public:
|
||||
typedef float ResultType;
|
||||
typedef float ValueType;
|
||||
|
||||
@@ -177,13 +170,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return mySum;
|
||||
}
|
||||
|
||||
private:
|
||||
float mySum;
|
||||
};
|
||||
|
||||
class L2Dist
|
||||
struct L2Dist
|
||||
{
|
||||
public:
|
||||
typedef float ResultType;
|
||||
typedef float ValueType;
|
||||
|
||||
@@ -205,13 +196,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return sqrtf(mySum);
|
||||
}
|
||||
|
||||
private:
|
||||
float mySum;
|
||||
};
|
||||
|
||||
class HammingDist
|
||||
struct HammingDist
|
||||
{
|
||||
public:
|
||||
typedef int ResultType;
|
||||
typedef int ValueType;
|
||||
|
||||
@@ -232,7 +221,6 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return mySum;
|
||||
}
|
||||
|
||||
private:
|
||||
int mySum;
|
||||
};
|
||||
|
||||
@@ -425,10 +413,8 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// ReduceDescCalculator
|
||||
|
||||
template <int BLOCK_DIM_X, typename T>
|
||||
class ReduceDescCalculatorSimple
|
||||
template <int BLOCK_DIM_X, typename T> struct ReduceDescCalculatorSimple
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ void prepare(const T* queryDescs_, int, void*)
|
||||
{
|
||||
queryDescs = queryDescs_;
|
||||
@@ -440,14 +426,12 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
reduceDescDiff<BLOCK_DIM_X>(queryDescs, trainDescs, desc_len, dist, sdiff_row);
|
||||
}
|
||||
|
||||
private:
|
||||
const T* queryDescs;
|
||||
};
|
||||
|
||||
template <int BLOCK_DIM_X, int MAX_DESCRIPTORS_LEN, bool DESC_LEN_EQ_MAX_LEN, typename T, typename U>
|
||||
class ReduceDescCalculatorCached
|
||||
struct ReduceDescCalculatorCached
|
||||
{
|
||||
public:
|
||||
__device__ __forceinline__ void prepare(const T* queryDescs, int desc_len, U* smem)
|
||||
{
|
||||
loadDescsVals<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN>(queryDescs, desc_len, queryVals, smem);
|
||||
@@ -459,7 +443,6 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
reduceDescDiffCached<BLOCK_DIM_X, MAX_DESCRIPTORS_LEN, DESC_LEN_EQ_MAX_LEN>(queryVals, trainDescs, desc_len, dist, sdiff_row);
|
||||
}
|
||||
|
||||
private:
|
||||
U queryVals[MAX_DESCRIPTORS_LEN / BLOCK_DIM_X];
|
||||
};
|
||||
|
||||
@@ -497,10 +480,8 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Train collection loop strategy
|
||||
|
||||
template <typename T>
|
||||
class SingleTrain
|
||||
template <typename T> struct SingleTrain
|
||||
{
|
||||
public:
|
||||
explicit SingleTrain(const DevMem2D_<T>& trainDescs_) : trainDescs(trainDescs_)
|
||||
{
|
||||
}
|
||||
@@ -517,14 +498,11 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return trainDescs.cols;
|
||||
}
|
||||
|
||||
private:
|
||||
DevMem2D_<T> trainDescs;
|
||||
const DevMem2D_<T> trainDescs;
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
class TrainCollection
|
||||
template <typename T> struct TrainCollection
|
||||
{
|
||||
public:
|
||||
TrainCollection(const DevMem2D_<T>* trainCollection_, int nImg_, int desclen_) :
|
||||
trainCollection(trainCollection_), nImg(nImg_), desclen(desclen_)
|
||||
{
|
||||
@@ -536,7 +514,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
{
|
||||
for (int imgIdx = 0; imgIdx < nImg; ++imgIdx)
|
||||
{
|
||||
DevMem2D_<T> trainDescs = trainCollection[imgIdx];
|
||||
const DevMem2D_<T> trainDescs = trainCollection[imgIdx];
|
||||
m.nextMask();
|
||||
matchDescs<Dist>(queryIdx, imgIdx, trainDescs, m, reduceDescCalc, myMin, myBestTrainIdx, myBestImgIdx, sdiff_row);
|
||||
}
|
||||
@@ -547,7 +525,6 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
return desclen;
|
||||
}
|
||||
|
||||
private:
|
||||
const DevMem2D_<T>* trainCollection;
|
||||
int nImg;
|
||||
int desclen;
|
||||
@@ -806,7 +783,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
// Calc distance kernel
|
||||
|
||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
|
||||
__global__ void calcDistance(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_, Mask mask, PtrStepf distance)
|
||||
__global__ void calcDistance(const PtrStep_<T> queryDescs_, const DevMem2D_<T> trainDescs_, const Mask mask, PtrStepf distance)
|
||||
{
|
||||
__shared__ typename Dist::ResultType sdiff[BLOCK_DIM_X * BLOCK_DIM_Y];
|
||||
|
||||
@@ -989,8 +966,7 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// find knn match kernel
|
||||
|
||||
template <int BLOCK_SIZE>
|
||||
__global__ void findBestMatch(DevMem2Df allDist_, int i, PtrStepi trainIdx_, PtrStepf distance_)
|
||||
template <int BLOCK_SIZE> __global__ void findBestMatch(DevMem2Df allDist_, int i, PtrStepi trainIdx_, PtrStepf distance_)
|
||||
{
|
||||
const int SMEM_SIZE = BLOCK_SIZE > 64 ? BLOCK_SIZE : 64;
|
||||
__shared__ float sdist[SMEM_SIZE];
|
||||
@@ -1130,8 +1106,8 @@ namespace cv { namespace gpu { namespace bfmatcher
|
||||
// Radius Match kernel
|
||||
|
||||
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, typename Dist, typename T, typename Mask>
|
||||
__global__ void radiusMatch(PtrStep_<T> queryDescs_, DevMem2D_<T> trainDescs_,
|
||||
float maxDistance, Mask mask, DevMem2Di trainIdx_, unsigned int* nMatches, PtrStepf distance)
|
||||
__global__ void radiusMatch(const PtrStep_<T> queryDescs_, const DevMem2D_<T> trainDescs_,
|
||||
float maxDistance, const Mask mask, DevMem2Di trainIdx_, unsigned int* nMatches, PtrStepf distance)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
||||
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
#define SOLVE_PNP_RANSAC_MAX_NUM_ITERS 200
|
||||
|
||||
@@ -56,9 +57,9 @@ namespace cv { namespace gpu
|
||||
__constant__ float3 crot2;
|
||||
__constant__ float3 ctransl;
|
||||
|
||||
struct TransformOp
|
||||
struct TransformOp : unary_function<float3, float3>
|
||||
{
|
||||
__device__ __forceinline__ float3 operator()(float3 p) const
|
||||
__device__ __forceinline__ float3 operator()(const float3& p) const
|
||||
{
|
||||
return make_float3(
|
||||
crot0.x * p.x + crot0.y * p.y + crot0.z * p.z + ctransl.x,
|
||||
@@ -89,9 +90,9 @@ namespace cv { namespace gpu
|
||||
__constant__ float3 cproj0;
|
||||
__constant__ float3 cproj1;
|
||||
|
||||
struct ProjectOp
|
||||
struct ProjectOp : unary_function<float3, float3>
|
||||
{
|
||||
__device__ __forceinline__ float2 operator()(float3 p) const
|
||||
__device__ __forceinline__ float2 operator()(const float3& p) const
|
||||
{
|
||||
// Rotate and translate in 3D
|
||||
float3 t = make_float3(
|
||||
|
||||
@@ -49,7 +49,7 @@ using namespace cv::gpu::device;
|
||||
|
||||
namespace cv { namespace gpu { namespace canny
|
||||
{
|
||||
__global__ void calcSobelRowPass(PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
||||
__global__ void calcSobelRowPass(const PtrStep src, PtrStepi dx_buf, PtrStepi dy_buf, int rows, int cols)
|
||||
{
|
||||
__shared__ int smem[16][18];
|
||||
|
||||
@@ -100,7 +100,8 @@ namespace cv { namespace gpu { namespace canny
|
||||
}
|
||||
};
|
||||
|
||||
template <typename Norm> __global__ void calcMagnitude(PtrStepi dx_buf, PtrStepi dy_buf, PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
|
||||
template <typename Norm> __global__ void calcMagnitude(const PtrStepi dx_buf, const PtrStepi dy_buf,
|
||||
PtrStepi dx, PtrStepi dy, PtrStepf mag, int rows, int cols)
|
||||
{
|
||||
__shared__ int sdx[18][16];
|
||||
__shared__ int sdy[18][16];
|
||||
@@ -179,7 +180,7 @@ namespace cv { namespace gpu { namespace canny
|
||||
#define CANNY_SHIFT 15
|
||||
#define TG22 (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5)
|
||||
|
||||
__global__ void calcMap(PtrStepi dx, PtrStepi dy, PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
|
||||
__global__ void calcMap(const PtrStepi dx, const PtrStepi dy, const PtrStepf mag, PtrStepi map, int rows, int cols, float low_thresh, float high_thresh)
|
||||
{
|
||||
__shared__ float smem[18][18];
|
||||
|
||||
|
||||
@@ -56,10 +56,9 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
//////////////////////////////////////////////////////////////////////////////////////
|
||||
// Compare
|
||||
|
||||
template <typename T1, typename T2>
|
||||
struct NotEqual
|
||||
template <typename T1, typename T2> struct NotEqual : binary_function<T1, T2, uchar>
|
||||
{
|
||||
__device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2)
|
||||
__device__ __forceinline__ uchar operator()(const T1& src1, const T2& src2) const
|
||||
{
|
||||
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
|
||||
}
|
||||
@@ -467,8 +466,7 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pow
|
||||
|
||||
template<typename T, bool Signed = device::numeric_limits<T>::is_signed>
|
||||
struct PowOp
|
||||
template<typename T, bool Signed = device::numeric_limits<T>::is_signed> struct PowOp : unary_function<T, T>
|
||||
{
|
||||
float power;
|
||||
PowOp(float power_) : power(power_) {}
|
||||
@@ -479,13 +477,12 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct PowOp<T, true>
|
||||
template<typename T> struct PowOp<T, true> : unary_function<T, T>
|
||||
{
|
||||
float power;
|
||||
PowOp(float power_) : power(power_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(const T& e)
|
||||
__device__ __forceinline__ float operator()(const T& e) const
|
||||
{
|
||||
T res = saturate_cast<T>(__powf((float)e, power));
|
||||
|
||||
@@ -495,13 +492,12 @@ namespace cv { namespace gpu { namespace mathfunc
|
||||
}
|
||||
};
|
||||
|
||||
template<>
|
||||
struct PowOp<float>
|
||||
template<> struct PowOp<float> : unary_function<float, float>
|
||||
{
|
||||
float power;
|
||||
PowOp(float power_) : power(power_) {}
|
||||
|
||||
__device__ __forceinline__ float operator()(const float& e)
|
||||
__device__ __forceinline__ float operator()(const float& e) const
|
||||
{
|
||||
return __powf(fabs(e), power);
|
||||
}
|
||||
|
||||
@@ -105,7 +105,7 @@ namespace cv { namespace gpu { namespace histograms
|
||||
if (x + 3 < cols) addByte(s_WarpHist, (data >> 24) & 0xFFU, tag);
|
||||
}
|
||||
|
||||
__global__ void histogram256(PtrStep_<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
|
||||
__global__ void histogram256(const PtrStep_<uint> d_Data, uint* d_PartialHistograms, uint dataCount, uint cols)
|
||||
{
|
||||
//Per-warp subhistogram storage
|
||||
__shared__ uint s_Hist[HISTOGRAM256_THREADBLOCK_MEMORY];
|
||||
@@ -189,21 +189,18 @@ namespace cv { namespace gpu { namespace histograms
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
__global__ void equalizeHist(DevMem2D src, PtrStep dst, const int* lut)
|
||||
__constant__ int c_lut[256];
|
||||
|
||||
__global__ void equalizeHist(const DevMem2D src, PtrStep dst)
|
||||
{
|
||||
__shared__ int s_lut[256];
|
||||
|
||||
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
|
||||
|
||||
s_lut[tid] = lut[tid];
|
||||
__syncthreads();
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < src.cols && y < src.rows)
|
||||
{
|
||||
dst.ptr(y)[x] = __float2int_rn(255.0f * s_lut[src.ptr(y)[x]] / (src.cols * src.rows));
|
||||
const uchar val = src.ptr(y)[x];
|
||||
const int lut = c_lut[val];
|
||||
dst.ptr(y)[x] = __float2int_rn(255.0f / (src.cols * src.rows) * lut);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -212,7 +209,9 @@ namespace cv { namespace gpu { namespace histograms
|
||||
dim3 block(16, 16);
|
||||
dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
|
||||
|
||||
equalizeHist<<<grid, block, 0, stream>>>(src, dst, lut);
|
||||
cudaSafeCall( cudaMemcpyToSymbol(cv::gpu::histograms::c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
|
||||
|
||||
equalizeHist<<<grid, block, 0, stream>>>(src, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
|
||||
@@ -49,7 +49,7 @@ using namespace cv::gpu::device;
|
||||
/////////////////////////////////// Remap ///////////////////////////////////////////////
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap(0, cudaFilterModeLinear, cudaAddressModeWrap);
|
||||
|
||||
__global__ void remap_1c(const float* mapx, const float* mapy, size_t map_step, uchar* out, size_t out_step, int width, int height)
|
||||
{
|
||||
@@ -131,16 +131,12 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
grid.x = divUp(dst.cols, threads.x);
|
||||
grid.y = divUp(dst.rows, threads.y);
|
||||
|
||||
tex_remap.filterMode = cudaFilterModeLinear;
|
||||
tex_remap.addressMode[0] = tex_remap.addressMode[1] = cudaAddressModeWrap;
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex_remap, src.data, desc, src.cols, src.rows, src.step) );
|
||||
TextureBinder tex_remap(&tex_remap, src);
|
||||
|
||||
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture(tex_remap) );
|
||||
}
|
||||
|
||||
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
|
||||
@@ -151,8 +147,8 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
grid.y = divUp(dst.rows, threads.y);
|
||||
|
||||
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
|
||||
@@ -77,7 +77,6 @@ namespace cv
|
||||
// Returns true if the GPU analogue exists, false otherwise.
|
||||
bool tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType);
|
||||
|
||||
|
||||
static inline int divUp(int total, int grain) { return (total + grain - 1) / grain; }
|
||||
|
||||
template<class T> static inline void uploadConstant(const char* name, const T& value)
|
||||
@@ -117,6 +116,49 @@ namespace cv
|
||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||
}
|
||||
|
||||
class TextureBinder
|
||||
{
|
||||
public:
|
||||
TextureBinder() : tex_(0) {}
|
||||
template <typename T> TextureBinder(const textureReference* tex, const DevMem2D_<T>& img) : tex_(0)
|
||||
{
|
||||
bind(tex, img);
|
||||
}
|
||||
template <typename T> TextureBinder(const char* tex_name, const DevMem2D_<T>& img) : tex_(0)
|
||||
{
|
||||
bind(tex_name, img);
|
||||
}
|
||||
~TextureBinder() { unbind(); }
|
||||
|
||||
template <typename T> void bind(const textureReference* tex, const DevMem2D_<T>& img)
|
||||
{
|
||||
unbind();
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex, img.ptr(), &desc, img.cols, img.rows, img.step) );
|
||||
|
||||
tex_ = tex;
|
||||
}
|
||||
template <typename T> void bind(const char* tex_name, const DevMem2D_<T>& img)
|
||||
{
|
||||
const textureReference* tex;
|
||||
cudaSafeCall( cudaGetTextureReference(&tex, tex_name) );
|
||||
bind(tex, img);
|
||||
}
|
||||
|
||||
void unbind()
|
||||
{
|
||||
if (tex_)
|
||||
{
|
||||
cudaUnbindTexture(tex_);
|
||||
tex_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
const textureReference* tex_;
|
||||
};
|
||||
|
||||
class NppStreamHandler
|
||||
{
|
||||
public:
|
||||
|
||||
@@ -43,6 +43,7 @@
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
#include "opencv2/gpu/device/functional.hpp"
|
||||
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
@@ -62,7 +63,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename T>
|
||||
__global__ void copy_to_with_mask(T * mat_src, T * mat_dst, const unsigned char * mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
|
||||
__global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -162,7 +163,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void set_to_without_mask(T * mat, int cols, int rows, size_t step, int channels)
|
||||
__global__ void set_to_without_mask(T* mat, int cols, int rows, size_t step, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -175,7 +176,7 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
__global__ void set_to_with_mask(T * mat, const unsigned char * mask, int cols, int rows, size_t step, int channels, size_t step_mask)
|
||||
__global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@@ -237,19 +238,16 @@ namespace cv { namespace gpu { namespace matrix_operations {
|
||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template <typename T, typename D>
|
||||
class Convertor
|
||||
template <typename T, typename D> struct Convertor : unary_function<T, D>
|
||||
{
|
||||
public:
|
||||
Convertor(double alpha_, double beta_) : alpha(alpha_), beta(beta_) {}
|
||||
|
||||
__device__ __forceinline__ D operator()(const T& src)
|
||||
__device__ __forceinline__ D operator()(const T& src) const
|
||||
{
|
||||
return saturate_cast<D>(alpha * src + beta);
|
||||
}
|
||||
|
||||
private:
|
||||
double alpha, beta;
|
||||
const double alpha, beta;
|
||||
};
|
||||
|
||||
template<typename T, typename D>
|
||||
|
||||
@@ -225,7 +225,7 @@ namespace cv { namespace gpu { namespace surf
|
||||
};
|
||||
|
||||
template <typename Mask>
|
||||
__global__ void icvFindMaximaInLayer(PtrStepf det, PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter)
|
||||
__global__ void icvFindMaximaInLayer(const PtrStepf det, const PtrStepf trace, int4* maxPosBuffer, unsigned int* maxCounter)
|
||||
{
|
||||
#if defined (__CUDA_ARCH__) && __CUDA_ARCH__ >= 110
|
||||
|
||||
@@ -346,7 +346,7 @@ namespace cv { namespace gpu { namespace surf
|
||||
////////////////////////////////////////////////////////////////////////
|
||||
// INTERPOLATION
|
||||
|
||||
__global__ void icvInterpolateKeypoint(PtrStepf det, const int4* maxPosBuffer,
|
||||
__global__ void icvInterpolateKeypoint(const PtrStepf det, const int4* maxPosBuffer,
|
||||
float* featureX, float* featureY, int* featureLaplacian, float* featureSize, float* featureHessian,
|
||||
unsigned int* featureCounter)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user