optimized gpu pyrDown and pyrUp
This commit is contained in:
parent
3a3bc18381
commit
173ac5a64b
@ -762,48 +762,10 @@ namespace cv
|
||||
|
||||
//! smoothes the source image and downsamples it
|
||||
CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
struct CV_EXPORTS PyrDownBuf;
|
||||
|
||||
CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, PyrDownBuf& buf, Stream& stream = Stream::Null());
|
||||
|
||||
struct CV_EXPORTS PyrDownBuf
|
||||
{
|
||||
PyrDownBuf() : image_type(-1) {}
|
||||
PyrDownBuf(Size image_size, int image_type_) : image_type(-1) { create(image_size, image_type_); }
|
||||
void create(Size image_size, int image_type_);
|
||||
|
||||
private:
|
||||
friend void pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream& stream);
|
||||
|
||||
static Mat ker;
|
||||
GpuMat buf;
|
||||
Ptr<FilterEngine_GPU> filter;
|
||||
int image_type;
|
||||
};
|
||||
|
||||
//! upsamples the source image and then smoothes it
|
||||
CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
|
||||
|
||||
struct CV_EXPORTS PyrUpBuf;
|
||||
|
||||
CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& stream = Stream::Null());
|
||||
|
||||
struct CV_EXPORTS PyrUpBuf
|
||||
{
|
||||
PyrUpBuf() : image_type(-1) {}
|
||||
PyrUpBuf(Size image_size, int image_type_) : image_type(-1) { create(image_size, image_type_); }
|
||||
void create(Size image_size, int image_type_);
|
||||
|
||||
private:
|
||||
friend void pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream& stream);
|
||||
|
||||
static Mat ker;
|
||||
GpuMat buf;
|
||||
Ptr<FilterEngine_GPU> filter;
|
||||
int image_type;
|
||||
};
|
||||
|
||||
//! performs linear blending of two images
|
||||
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
|
||||
CV_EXPORTS void blendLinear(const GpuMat& img1, const GpuMat& img2, const GpuMat& weights1, const GpuMat& weights2,
|
||||
|
@ -42,10 +42,8 @@
|
||||
|
||||
#include <utility>
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/utility.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
|
||||
namespace cv { namespace gpu { namespace canny
|
||||
{
|
||||
|
@ -42,6 +42,8 @@
|
||||
|
||||
#include "internal_shared.hpp"
|
||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
||||
#include "opencv2/gpu/device/vec_traits.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
using namespace cv::gpu::device;
|
||||
@ -976,6 +978,250 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
template void upsampleCaller<float,3>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
|
||||
template void upsampleCaller<float,4>(const DevMem2D src, DevMem2D dst, cudaStream_t stream);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pyrDown
|
||||
|
||||
template <typename T> __global__ void pyrDown(const PtrStep_<T> src, PtrStep_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd, int dst_cols)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y;
|
||||
|
||||
__shared__ value_type smem[256 + 4];
|
||||
|
||||
value_type sum;
|
||||
|
||||
const int src_y = 2*y;
|
||||
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(x)];
|
||||
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(x)];
|
||||
sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(x)];
|
||||
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(x)];
|
||||
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(x)];
|
||||
|
||||
smem[2 + threadIdx.x] = sum;
|
||||
|
||||
if (threadIdx.x < 2)
|
||||
{
|
||||
const int left_x = x - 2 + threadIdx.x;
|
||||
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(left_x)];
|
||||
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(left_x)];
|
||||
sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(left_x)];
|
||||
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(left_x)];
|
||||
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(left_x)];
|
||||
|
||||
smem[threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.x > 253)
|
||||
{
|
||||
const int right_x = x + threadIdx.x + 2;
|
||||
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(right_x)];
|
||||
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(right_x)];
|
||||
sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(right_x)];
|
||||
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(right_x)];
|
||||
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(right_x)];
|
||||
|
||||
smem[4 + threadIdx.x] = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (threadIdx.x < 128)
|
||||
{
|
||||
const int tid2 = threadIdx.x * 2;
|
||||
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * smem[2 + tid2 - 2];
|
||||
sum = sum + 0.25f * smem[2 + tid2 - 1];
|
||||
sum = sum + 0.375f * smem[2 + tid2 ];
|
||||
sum = sum + 0.25f * smem[2 + tid2 + 1];
|
||||
sum = sum + 0.0625f * smem[2 + tid2 + 2];
|
||||
|
||||
const int dst_x = (blockIdx.x * blockDim.x + tid2) / 2;
|
||||
|
||||
if (dst_x < dst_cols)
|
||||
dst.ptr(y)[dst_x] = saturate_cast<T>(sum);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(256);
|
||||
const dim3 grid(divUp(src.cols, block.x), dst.rows);
|
||||
|
||||
BrdReflect101 rowBrd(src.cols);
|
||||
BrdReflect101 colBrd(src.rows);
|
||||
|
||||
pyrDown<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>(
|
||||
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src),
|
||||
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst),
|
||||
rowBrd, colBrd, dst.cols);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void pyrDown_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrDown_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrDown_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// pyrUp
|
||||
|
||||
template <typename T> __global__ void pyrUp(const PtrStep_<T> src, DevMem2D_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
__shared__ T smem1[10][10];
|
||||
__shared__ value_type smem2[20][16];
|
||||
|
||||
value_type sum;
|
||||
|
||||
if (threadIdx.x < 10 && threadIdx.y < 10)
|
||||
smem1[threadIdx.y][threadIdx.x] = src.ptr(colBrd.idx(blockIdx.y * blockDim.y / 2 + threadIdx.y - 1))[rowBrd.idx(blockIdx.x * blockDim.x / 2 + threadIdx.x - 1)];
|
||||
|
||||
__syncthreads();
|
||||
|
||||
const int tidx = threadIdx.x;
|
||||
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * smem1[1 + threadIdx.y / 2][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + 0.25f * smem1[1 + threadIdx.y / 2][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + 0.375f * smem1[1 + threadIdx.y / 2][1 + ((tidx ) >> 1)];
|
||||
sum = sum + 0.25f * smem1[1 + threadIdx.y / 2][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + 0.0625f * smem1[1 + threadIdx.y / 2][1 + ((tidx + 2) >> 1)];
|
||||
|
||||
smem2[2 + threadIdx.y][tidx] = sum;
|
||||
|
||||
if (threadIdx.y < 2)
|
||||
{
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * smem1[0][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + 0.25f * smem1[0][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + 0.375f * smem1[0][1 + ((tidx ) >> 1)];
|
||||
sum = sum + 0.25f * smem1[0][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + 0.0625f * smem1[0][1 + ((tidx + 2) >> 1)];
|
||||
|
||||
smem2[threadIdx.y][tidx] = sum;
|
||||
}
|
||||
|
||||
if (threadIdx.y > 13)
|
||||
{
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * smem1[9][1 + ((tidx - 2) >> 1)];
|
||||
sum = sum + 0.25f * smem1[9][1 + ((tidx - 1) >> 1)];
|
||||
sum = sum + 0.375f * smem1[9][1 + ((tidx ) >> 1)];
|
||||
sum = sum + 0.25f * smem1[9][1 + ((tidx + 1) >> 1)];
|
||||
sum = sum + 0.0625f * smem1[9][1 + ((tidx + 2) >> 1)];
|
||||
|
||||
smem2[4 + threadIdx.y][tidx] = sum;
|
||||
}
|
||||
|
||||
__syncthreads();
|
||||
|
||||
sum = VecTraits<value_type>::all(0);
|
||||
|
||||
sum = sum + 0.0625f * smem2[2 + threadIdx.y - 2][tidx];
|
||||
sum = sum + 0.25f * smem2[2 + threadIdx.y - 1][tidx];
|
||||
sum = sum + 0.375f * smem2[2 + threadIdx.y ][tidx];
|
||||
sum = sum + 0.25f * smem2[2 + threadIdx.y + 1][tidx];
|
||||
sum = sum + 0.0625f * smem2[2 + threadIdx.y + 2][tidx];
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
dst.ptr(y)[x] = saturate_cast<T>(sum);
|
||||
}
|
||||
|
||||
template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdReflect101 rowBrd(src.cols);
|
||||
BrdReflect101 colBrd(src.rows);
|
||||
|
||||
pyrUp<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>(
|
||||
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src),
|
||||
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst),
|
||||
rowBrd, colBrd);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
template void pyrUp_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
template void pyrUp_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
template void pyrUp_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// buildWarpMaps
|
||||
|
@ -93,11 +93,7 @@ void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&
|
||||
void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::PyrDownBuf::create(Size, int) { throw_nogpu(); }
|
||||
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, PyrDownBuf&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::PyrUpBuf::create(Size, int) { throw_nogpu(); }
|
||||
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, PyrUpBuf&, Stream&) { throw_nogpu(); }
|
||||
void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
|
||||
void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
|
||||
void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }
|
||||
@ -1598,66 +1594,64 @@ void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// pyrDown
|
||||
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
PyrDownBuf buf;
|
||||
pyrDown(src, dst, buf, stream);
|
||||
}
|
||||
using namespace cv::gpu::imgproc;
|
||||
|
||||
cv::Mat cv::gpu::PyrDownBuf::ker;
|
||||
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
void cv::gpu::PyrDownBuf::create(Size image_size, int image_type_)
|
||||
{
|
||||
if (ker.empty() || image_type_ != image_type)
|
||||
ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_)));
|
||||
|
||||
ensureSizeIsEnough(image_size.height, image_size.width, image_type_, buf);
|
||||
|
||||
if (filter.empty() || image_type_ != image_type)
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
image_type = image_type_;
|
||||
filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker);
|
||||
}
|
||||
}
|
||||
{pyrDown_gpu<uchar, 1>, pyrDown_gpu<uchar, 2>, pyrDown_gpu<uchar, 3>, pyrDown_gpu<uchar, 4>},
|
||||
{pyrDown_gpu<schar, 1>, pyrDown_gpu<schar, 2>, pyrDown_gpu<schar, 3>, pyrDown_gpu<schar, 4>},
|
||||
{pyrDown_gpu<ushort, 1>, pyrDown_gpu<ushort, 2>, pyrDown_gpu<ushort, 3>, pyrDown_gpu<ushort, 4>},
|
||||
{pyrDown_gpu<short, 1>, pyrDown_gpu<short, 2>, pyrDown_gpu<short, 3>, pyrDown_gpu<short, 4>},
|
||||
{pyrDown_gpu<int, 1>, pyrDown_gpu<int, 2>, pyrDown_gpu<int, 3>, pyrDown_gpu<int, 4>},
|
||||
{pyrDown_gpu<float, 1>, pyrDown_gpu<float, 2>, pyrDown_gpu<float, 3>, pyrDown_gpu<float, 4>},
|
||||
};
|
||||
|
||||
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, PyrDownBuf& buf, Stream& stream)
|
||||
{
|
||||
buf.create(src.size(), src.type());
|
||||
buf.filter->apply(src, buf.buf, Rect(0, 0, src.cols, src.rows), stream);
|
||||
downsample(buf.buf, dst, stream);
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
|
||||
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////////
|
||||
// pyrUp
|
||||
|
||||
namespace cv { namespace gpu { namespace imgproc
|
||||
{
|
||||
template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
PyrUpBuf buf;
|
||||
pyrUp(src, dst, buf, stream);
|
||||
}
|
||||
using namespace cv::gpu::imgproc;
|
||||
|
||||
cv::Mat cv::gpu::PyrUpBuf::ker;
|
||||
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
|
||||
|
||||
void cv::gpu::PyrUpBuf::create(Size image_size, int image_type_)
|
||||
{
|
||||
if (ker.empty() || image_type_ != image_type)
|
||||
ker = getGaussianKernel(5, 0, std::max(CV_32F, CV_MAT_DEPTH(image_type_))) * 2;
|
||||
|
||||
ensureSizeIsEnough(image_size.height * 2, image_size.width * 2, image_type_, buf);
|
||||
|
||||
if (filter.empty() || image_type_ != image_type)
|
||||
static const func_t funcs[6][4] =
|
||||
{
|
||||
image_type = image_type_;
|
||||
filter = createSeparableLinearFilter_GPU(image_type, image_type, ker, ker);
|
||||
}
|
||||
}
|
||||
{pyrUp_gpu<uchar, 1>, pyrUp_gpu<uchar, 2>, pyrUp_gpu<uchar, 3>, pyrUp_gpu<uchar, 4>},
|
||||
{pyrUp_gpu<schar, 1>, pyrUp_gpu<schar, 2>, pyrUp_gpu<schar, 3>, pyrUp_gpu<schar, 4>},
|
||||
{pyrUp_gpu<ushort, 1>, pyrUp_gpu<ushort, 2>, pyrUp_gpu<ushort, 3>, pyrUp_gpu<ushort, 4>},
|
||||
{pyrUp_gpu<short, 1>, pyrUp_gpu<short, 2>, pyrUp_gpu<short, 3>, pyrUp_gpu<short, 4>},
|
||||
{pyrUp_gpu<int, 1>, pyrUp_gpu<int, 2>, pyrUp_gpu<int, 3>, pyrUp_gpu<int, 4>},
|
||||
{pyrUp_gpu<float, 1>, pyrUp_gpu<float, 2>, pyrUp_gpu<float, 3>, pyrUp_gpu<float, 4>},
|
||||
};
|
||||
|
||||
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, PyrUpBuf& buf, Stream& stream)
|
||||
{
|
||||
buf.create(src.size(), src.type());
|
||||
upsample(src, buf.buf, stream);
|
||||
buf.filter->apply(buf.buf, dst, Rect(0, 0, buf.buf.cols, buf.buf.rows), stream);
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
|
||||
dst.create(src.rows*2, src.cols*2, src.type());
|
||||
|
||||
funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
||||
|
||||
|
@ -50,7 +50,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
struct BrdReflect101
|
||||
{
|
||||
explicit BrdReflect101(int len): last(len - 1) {}
|
||||
explicit __host__ __device__ __forceinline__ BrdReflect101(int len): last(len - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_low(int i) const
|
||||
{
|
||||
@ -67,17 +67,17 @@ namespace cv { namespace gpu { namespace device
|
||||
return idx_low(idx_high(i));
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return -last <= mini && maxi <= 2 * last;
|
||||
}
|
||||
|
||||
int last;
|
||||
const int last;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdRowReflect101 : BrdReflect101
|
||||
{
|
||||
explicit BrdRowReflect101(int len): BrdReflect101(len) {}
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int len): BrdReflect101(len) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
@ -92,7 +92,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <typename D> struct BrdColReflect101 : BrdReflect101
|
||||
{
|
||||
BrdColReflect101(int len, size_t step): BrdReflect101(len), step(step) {}
|
||||
__host__ __device__ __forceinline__ BrdColReflect101(int len, size_t step): BrdReflect101(len), step(step) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
@ -104,12 +104,12 @@ namespace cv { namespace gpu { namespace device
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
|
||||
}
|
||||
|
||||
size_t step;
|
||||
const size_t step;
|
||||
};
|
||||
|
||||
struct BrdReplicate
|
||||
{
|
||||
explicit BrdReplicate(int len): last(len - 1) {}
|
||||
explicit __host__ __device__ __forceinline__ BrdReplicate(int len): last(len - 1) {}
|
||||
|
||||
__device__ __forceinline__ int idx_low(int i) const
|
||||
{
|
||||
@ -131,12 +131,12 @@ namespace cv { namespace gpu { namespace device
|
||||
return true;
|
||||
}
|
||||
|
||||
int last;
|
||||
const int last;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdRowReplicate : BrdReplicate
|
||||
{
|
||||
explicit BrdRowReplicate(int len): BrdReplicate(len) {}
|
||||
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int len): BrdReplicate(len) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
@ -152,7 +152,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <typename D> struct BrdColReplicate : BrdReplicate
|
||||
{
|
||||
BrdColReplicate(int len, size_t step): BrdReplicate(len), step(step) {}
|
||||
__host__ __device__ __forceinline__ BrdColReplicate(int len, size_t step): BrdReplicate(len), step(step) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
@ -164,12 +164,12 @@ namespace cv { namespace gpu { namespace device
|
||||
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step));
|
||||
}
|
||||
|
||||
size_t step;
|
||||
const size_t step;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdRowConstant
|
||||
{
|
||||
explicit BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}
|
||||
explicit __host__ __device__ __forceinline__ BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
@ -181,18 +181,18 @@ namespace cv { namespace gpu { namespace device
|
||||
return i < len ? saturate_cast<D>(data[i]) : val;
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
int len;
|
||||
D val;
|
||||
const int len;
|
||||
const D val;
|
||||
};
|
||||
|
||||
template <typename D> struct BrdColConstant
|
||||
{
|
||||
BrdColConstant(int len_, size_t step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}
|
||||
__host__ __device__ __forceinline__ BrdColConstant(int len_, size_t step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {}
|
||||
|
||||
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const
|
||||
{
|
||||
@ -204,19 +204,19 @@ namespace cv { namespace gpu { namespace device
|
||||
return i < len ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val;
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
int len;
|
||||
size_t step;
|
||||
D val;
|
||||
const int len;
|
||||
const size_t step;
|
||||
const D val;
|
||||
};
|
||||
|
||||
template <typename OutT> struct BrdConstant
|
||||
{
|
||||
BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {}
|
||||
__host__ __device__ __forceinline__ BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {}
|
||||
|
||||
__device__ __forceinline__ OutT at(int x, int y, const uchar* data, int step) const
|
||||
{
|
||||
@ -225,7 +225,8 @@ namespace cv { namespace gpu { namespace device
|
||||
return val;
|
||||
}
|
||||
|
||||
int w, h;
|
||||
const int w;
|
||||
const int h;
|
||||
OutT val;
|
||||
};
|
||||
}}}
|
||||
|
@ -312,7 +312,7 @@ TEST_P(BruteForceMatcher, MatchAdd)
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, KnnMatch)
|
||||
TEST_P(BruteForceMatcher, KnnMatch2)
|
||||
{
|
||||
const char* distStr = dists[distType];
|
||||
|
||||
@ -352,7 +352,47 @@ TEST_P(BruteForceMatcher, KnnMatch)
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, KnnMatchAdd)
|
||||
TEST_P(BruteForceMatcher, KnnMatch3)
|
||||
{
|
||||
const char* distStr = dists[distType];
|
||||
|
||||
PRINT_PARAM(devInfo);
|
||||
PRINT_PARAM(distStr);
|
||||
PRINT_PARAM(dim);
|
||||
|
||||
const int knn = 3;
|
||||
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
ASSERT_NO_THROW(
|
||||
cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
|
||||
matcher.knnMatch(cv::gpu::GpuMat(query), cv::gpu::GpuMat(train), matches, knn);
|
||||
);
|
||||
|
||||
ASSERT_EQ(queryDescCount, matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != knn)
|
||||
badCount++;
|
||||
else
|
||||
{
|
||||
int localBadCount = 0;
|
||||
for (int k = 0; k < knn; k++)
|
||||
{
|
||||
cv::DMatch match = matches[i][k];
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k) || (match.imgIdx != 0))
|
||||
localBadCount++;
|
||||
}
|
||||
badCount += localBadCount > 0 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, KnnMatchAdd2)
|
||||
{
|
||||
const char* distStr = dists[distType];
|
||||
|
||||
@ -422,6 +462,76 @@ TEST_P(BruteForceMatcher, KnnMatchAdd)
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, KnnMatchAdd3)
|
||||
{
|
||||
const char* distStr = dists[distType];
|
||||
|
||||
PRINT_PARAM(devInfo);
|
||||
PRINT_PARAM(distStr);
|
||||
PRINT_PARAM(dim);
|
||||
|
||||
const int knn = 3;
|
||||
std::vector< std::vector<cv::DMatch> > matches;
|
||||
|
||||
bool isMaskSupported;
|
||||
|
||||
ASSERT_NO_THROW(
|
||||
cv::gpu::BruteForceMatcher_GPU_base matcher(distType);
|
||||
|
||||
cv::gpu::GpuMat d_train(train);
|
||||
|
||||
// make add() twice to test such case
|
||||
matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(0, train.rows / 2)));
|
||||
matcher.add(std::vector<cv::gpu::GpuMat>(1, d_train.rowRange(train.rows / 2, train.rows)));
|
||||
|
||||
// prepare masks (make first nearest match illegal)
|
||||
std::vector<cv::gpu::GpuMat> masks(2);
|
||||
for (int mi = 0; mi < 2; mi++ )
|
||||
{
|
||||
masks[mi] = cv::gpu::GpuMat(query.rows, train.rows / 2, CV_8UC1, cv::Scalar::all(1));
|
||||
for (int di = 0; di < queryDescCount / 2; di++)
|
||||
masks[mi].col(di * countFactor).setTo(cv::Scalar::all(0));
|
||||
}
|
||||
|
||||
matcher.knnMatch(cv::gpu::GpuMat(query), matches, knn, masks);
|
||||
|
||||
isMaskSupported = matcher.isMaskSupported();
|
||||
);
|
||||
|
||||
ASSERT_EQ(queryDescCount, matches.size());
|
||||
|
||||
int badCount = 0;
|
||||
int shift = isMaskSupported ? 1 : 0;
|
||||
for (size_t i = 0; i < matches.size(); i++)
|
||||
{
|
||||
if ((int)matches[i].size() != knn)
|
||||
badCount++;
|
||||
else
|
||||
{
|
||||
int localBadCount = 0;
|
||||
for (int k = 0; k < knn; k++)
|
||||
{
|
||||
cv::DMatch match = matches[i][k];
|
||||
{
|
||||
if (i < queryDescCount / 2)
|
||||
{
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != (int)i * countFactor + k + shift) || (match.imgIdx != 0) )
|
||||
localBadCount++;
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((match.queryIdx != (int)i) || (match.trainIdx != ((int)i - queryDescCount / 2) * countFactor + k + shift) || (match.imgIdx != 1) )
|
||||
localBadCount++;
|
||||
}
|
||||
}
|
||||
}
|
||||
badCount += localBadCount > 0 ? 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT_EQ(0, badCount);
|
||||
}
|
||||
|
||||
TEST_P(BruteForceMatcher, RadiusMatch)
|
||||
{
|
||||
if (!supportFeature(devInfo, cv::gpu::GLOBAL_ATOMICS))
|
||||
|
@ -3959,23 +3959,28 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Blend, testing::Combine(
|
||||
////////////////////////////////////////////////////////
|
||||
// pyrDown
|
||||
|
||||
struct PyrDown : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
struct PyrDown : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
int type;
|
||||
|
||||
cv::Size size;
|
||||
cv::Mat src;
|
||||
|
||||
cv::Mat dst_gold;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
devInfo = std::tr1::get<0>(GetParam());
|
||||
type = std::tr1::get<1>(GetParam());
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat img = readImage("stereobm/aloe-L.png");
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
img.convertTo(src, CV_16S);
|
||||
|
||||
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
|
||||
|
||||
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
|
||||
|
||||
src = cvtest::randomMat(rng, size, type, 0.0, 255.0, false);
|
||||
|
||||
cv::pyrDown(src, dst_gold);
|
||||
}
|
||||
@ -3984,6 +3989,8 @@ struct PyrDown : testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
TEST_P(PyrDown, Accuracy)
|
||||
{
|
||||
PRINT_PARAM(devInfo);
|
||||
PRINT_TYPE(type);
|
||||
PRINT_PARAM(size);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
@ -3998,34 +4005,43 @@ TEST_P(PyrDown, Accuracy)
|
||||
ASSERT_EQ(dst_gold.cols, dst.cols);
|
||||
ASSERT_EQ(dst_gold.rows, dst.rows);
|
||||
ASSERT_EQ(dst_gold.type(), dst.type());
|
||||
|
||||
double err = cvtest::crossCorr(dst_gold, dst) /
|
||||
(cv::norm(dst_gold,cv::NORM_L2)*cv::norm(dst,cv::NORM_L2));
|
||||
|
||||
double err = cvtest::crossCorr(dst_gold, dst) / (cv::norm(dst_gold,cv::NORM_L2)*cv::norm(dst,cv::NORM_L2));
|
||||
ASSERT_NEAR(err, 1., 1e-2);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::ValuesIn(devices()));
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, PyrDown, testing::Combine(
|
||||
testing::ValuesIn(devices()),
|
||||
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
|
||||
CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4,
|
||||
CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
|
||||
CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4)));
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// pyrUp
|
||||
|
||||
struct PyrUp: testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
struct PyrUp: testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
|
||||
{
|
||||
cv::gpu::DeviceInfo devInfo;
|
||||
int type;
|
||||
|
||||
cv::Size size;
|
||||
cv::Mat src;
|
||||
|
||||
cv::Mat dst_gold;
|
||||
|
||||
virtual void SetUp()
|
||||
{
|
||||
devInfo = GetParam();
|
||||
devInfo = std::tr1::get<0>(GetParam());
|
||||
type = std::tr1::get<1>(GetParam());
|
||||
|
||||
cv::gpu::setDevice(devInfo.deviceID());
|
||||
|
||||
cv::Mat img = readImage("stereobm/aloe-L.png");
|
||||
ASSERT_FALSE(img.empty());
|
||||
|
||||
img.convertTo(src, CV_16S);
|
||||
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
|
||||
|
||||
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
|
||||
|
||||
src = cvtest::randomMat(rng, size, type, 0.0, 255.0, false);
|
||||
|
||||
cv::pyrUp(src, dst_gold);
|
||||
}
|
||||
@ -4034,6 +4050,8 @@ struct PyrUp: testing::TestWithParam<cv::gpu::DeviceInfo>
|
||||
TEST_P(PyrUp, Accuracy)
|
||||
{
|
||||
PRINT_PARAM(devInfo);
|
||||
PRINT_TYPE(type);
|
||||
PRINT_PARAM(size);
|
||||
|
||||
cv::Mat dst;
|
||||
|
||||
@ -4049,12 +4067,17 @@ TEST_P(PyrUp, Accuracy)
|
||||
ASSERT_EQ(dst_gold.rows, dst.rows);
|
||||
ASSERT_EQ(dst_gold.type(), dst.type());
|
||||
|
||||
double err = cvtest::crossCorr(dst_gold, dst) /
|
||||
(cv::norm(dst_gold,cv::NORM_L2)*cv::norm(dst,cv::NORM_L2));
|
||||
double err = cvtest::crossCorr(dst_gold, dst) / (cv::norm(dst_gold,cv::NORM_L2)*cv::norm(dst,cv::NORM_L2));
|
||||
ASSERT_NEAR(err, 1., 1e-2);
|
||||
}
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::ValuesIn(devices()));
|
||||
|
||||
INSTANTIATE_TEST_CASE_P(ImgProc, PyrUp, testing::Combine(
|
||||
testing::ValuesIn(devices()),
|
||||
testing::Values(CV_8UC1, CV_8UC2, CV_8UC3, CV_8UC4,
|
||||
CV_16UC1, CV_16UC2, CV_16UC3, CV_16UC4,
|
||||
CV_16SC1, CV_16SC2, CV_16SC3, CV_16SC4,
|
||||
CV_32FC1, CV_32FC2, CV_32FC3, CV_32FC4)));
|
||||
|
||||
////////////////////////////////////////////////////////
|
||||
// Canny
|
||||
|
@ -304,8 +304,8 @@ TEST(BruteForceMatcher)
|
||||
gpu::GpuMat d_train(train);
|
||||
|
||||
// Output
|
||||
vector< vector<DMatch> > matches(1);
|
||||
vector< vector<DMatch> > d_matches(1);
|
||||
vector< vector<DMatch> > matches(2);
|
||||
vector< vector<DMatch> > d_matches(2);
|
||||
|
||||
SUBTEST << "match";
|
||||
|
||||
@ -875,49 +875,269 @@ TEST(GaussianBlur)
|
||||
|
||||
TEST(pyrDown)
|
||||
{
|
||||
gpu::PyrDownBuf buf(Size(4000, 4000), CV_16SC3);
|
||||
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "size " << size;
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "8UC1, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_16SC3, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
Mat src; gen(src, size, size, CV_8UC1, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst, buf);
|
||||
GPU_OFF;
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "8UC3, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_8UC3, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "8UC4, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_8UC4, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "16SC3, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_16SC3, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "32FC1, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_32FC1, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "32FC3, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_32FC3, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "32FC4, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_32FC4, 0, 256);
|
||||
Mat dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrDown(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols / 2, src.rows / 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrDown(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TEST(pyrUp)
|
||||
{
|
||||
gpu::PyrUpBuf buf(Size(4000, 4000), CV_16SC3);
|
||||
|
||||
for (int size = 4000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "size " << size;
|
||||
for (int size = 2000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "8UC1, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_16SC3, 0, 256);
|
||||
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
Mat src; gen(src, size, size, CV_8UC1, 0, 256);
|
||||
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrUp(d_src, d_dst, buf);
|
||||
GPU_OFF;
|
||||
GPU_ON;
|
||||
gpu::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 2000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "8UC3, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_8UC3, 0, 256);
|
||||
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 2000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "8UC4, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_8UC4, 0, 256);
|
||||
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 2000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "16SC3, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_16SC3, 0, 256);
|
||||
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 2000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "32FC1, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_32FC1, 0, 256);
|
||||
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
{
|
||||
for (int size = 2000; size >= 1000; size -= 1000)
|
||||
{
|
||||
SUBTEST << "32FC3, size " << size;
|
||||
|
||||
Mat src; gen(src, size, size, CV_32FC3, 0, 256);
|
||||
Mat dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
CPU_ON;
|
||||
pyrUp(src, dst);
|
||||
CPU_OFF;
|
||||
|
||||
gpu::GpuMat d_src(src);
|
||||
gpu::GpuMat d_dst(Size(src.cols * 2, src.rows * 2), src.type());
|
||||
|
||||
GPU_ON;
|
||||
gpu::pyrUp(d_src, d_dst);
|
||||
GPU_OFF;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user