GPU resize with INTER_AREA
This commit is contained in:
@@ -46,6 +46,7 @@
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/filters.hpp"
|
||||
# include <cfloat>
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
@@ -65,6 +66,17 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
}
|
||||
|
||||
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, float fx, float fy, DevMem2D_<T> dst)
|
||||
{
|
||||
const int x = blockDim.x * blockIdx.x + threadIdx.x;
|
||||
const int y = blockDim.y * blockIdx.y + threadIdx.y;
|
||||
|
||||
if (x < dst.cols && y < dst.rows)
|
||||
{
|
||||
dst(y, x) = saturate_cast<T>(src(y, x));
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||
{
|
||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
||||
@@ -74,13 +86,43 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
BrdReplicate<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
|
||||
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
|
||||
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<AreaFilter, T>
|
||||
{
|
||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcherStream<IntegerAreaFilter, T>
|
||||
{
|
||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
BrdConstant<T> brd(src.rows, src.cols);
|
||||
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
|
||||
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
|
||||
resize_area<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||
{
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
|
||||
@@ -169,14 +211,31 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> struct ResizeDispatcher<AreaFilter, T>
|
||||
{
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
||||
{
|
||||
int iscale_x = round(fx);
|
||||
int iscale_y = round(fy);
|
||||
|
||||
if( std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
|
||||
ResizeDispatcherStream<IntegerAreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
else
|
||||
ResizeDispatcherStream<AreaFilter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
DevMem2Db dst, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3] =
|
||||
static const caller_t callers[4] =
|
||||
{
|
||||
ResizeDispatcher<PointFilter, T>::call, ResizeDispatcher<LinearFilter, T>::call, ResizeDispatcher<CubicFilter, T>::call
|
||||
ResizeDispatcher<PointFilter, T>::call,
|
||||
ResizeDispatcher<LinearFilter, T>::call,
|
||||
ResizeDispatcher<CubicFilter, T>::call,
|
||||
ResizeDispatcher<AreaFilter, T>::call
|
||||
};
|
||||
|
||||
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
|
@@ -55,7 +55,7 @@ namespace cv { namespace gpu { namespace device
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_) : src(src_) {}
|
||||
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
@@ -70,7 +70,7 @@ namespace cv { namespace gpu { namespace device
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_) : src(src_) {}
|
||||
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
@@ -107,7 +107,7 @@ namespace cv { namespace gpu { namespace device
|
||||
typedef float index_type;
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_) : src(src_) {}
|
||||
explicit __host__ __device__ __forceinline__ CubicFilter(const Ptr2D& src_, float fx = 0.f, float fy = 0.f) : src(src_) {}
|
||||
|
||||
static __device__ __forceinline__ float bicubicCoeff(float x_)
|
||||
{
|
||||
@@ -154,6 +154,111 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
const Ptr2D src;
|
||||
};
|
||||
// for integer scaling
|
||||
template <typename Ptr2D> struct IntegerAreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ IntegerAreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_), scale(1.f / (scale_x * scale_y)) {}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for(int dy = sy1; dy < sy2; ++dy)
|
||||
for(int dx = sx1; dx < sx2; ++dx)
|
||||
{
|
||||
out = out + src(dy, dx) * scale;
|
||||
}
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y ,scale;
|
||||
};
|
||||
|
||||
template <typename Ptr2D> struct AreaFilter
|
||||
{
|
||||
typedef typename Ptr2D::elem_type elem_type;
|
||||
typedef float index_type;
|
||||
|
||||
explicit __host__ __device__ __forceinline__ AreaFilter(const Ptr2D& src_, float scale_x_, float scale_y_)
|
||||
: src(src_), scale_x(scale_x_), scale_y(scale_y_){}
|
||||
|
||||
__device__ __forceinline__ elem_type operator ()(float y, float x) const
|
||||
{
|
||||
float fsx1 = x * scale_x;
|
||||
float fsx2 = fsx1 + scale_x;
|
||||
|
||||
int sx1 = __float2int_ru(fsx1);
|
||||
int sx2 = __float2int_rd(fsx2);
|
||||
|
||||
float fsy1 = y * scale_y;
|
||||
float fsy2 = fsy1 + scale_y;
|
||||
|
||||
int sy1 = __float2int_ru(fsy1);
|
||||
int sy2 = __float2int_rd(fsy2);
|
||||
|
||||
float scale = 1.f / (fminf(scale_x, src.width - fsx1) * fminf(scale_y, src.height - fsy1));
|
||||
|
||||
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
|
||||
work_type out = VecTraits<work_type>::all(0.f);
|
||||
|
||||
for (int dy = sy1; dy < sy2; ++dy)
|
||||
{
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(dy, dx) * scale;
|
||||
|
||||
if (sx1 > fsx1)
|
||||
out = out + src(dy, (sx1 -1) ) * ((sx1 - fsx1) * scale);
|
||||
|
||||
if (sx2 < fsx2)
|
||||
out = out + src(dy, sx2) * ((fsx2 -sx2) * scale);
|
||||
}
|
||||
|
||||
if (sy1 > fsy1)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src( (sy1 - 1) , dx) * ((sy1 -fsy1) * scale);
|
||||
|
||||
if (sy2 < fsy2)
|
||||
for (int dx = sx1; dx < sx2; ++dx)
|
||||
out = out + src(sy2, dx) * ((fsy2 -sy2) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx1 > fsx1))
|
||||
out = out + src( (sy1 - 1) , (sx1 - 1)) * ((sy1 -fsy1) * (sx1 -fsx1) * scale);
|
||||
|
||||
if ((sy1 > fsy1) && (sx2 < fsx2))
|
||||
out = out + src( (sy1 - 1) , sx2) * ((sy1 -fsy1) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx2 < fsx2))
|
||||
out = out + src(sy2, sx2) * ((fsy2 -sy2) * (fsx2 -sx2) * scale);
|
||||
|
||||
if ((sy2 < fsy2) && (sx1 > fsx1))
|
||||
out = out + src(sy2, (sx1 - 1)) * ((fsy2 -sy2) * (sx1 -fsx1) * scale);
|
||||
|
||||
return saturate_cast<elem_type>(out);
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
float scale_x, scale_y;
|
||||
int width, haight;
|
||||
};
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
||||
#endif // __OPENCV_GPU_FILTERS_HPP__
|
||||
|
@@ -221,7 +221,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template<> struct VecTraits<char>
|
||||
{
|
||||
typedef char elem_type;
|
||||
typedef char elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ char all(char v) {return v;}
|
||||
static __device__ __host__ __forceinline__ char make(char x) {return x;}
|
||||
@@ -229,7 +229,7 @@ namespace cv { namespace gpu { namespace device
|
||||
};
|
||||
template<> struct VecTraits<schar>
|
||||
{
|
||||
typedef schar elem_type;
|
||||
typedef schar elem_type;
|
||||
enum {cn=1};
|
||||
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
|
||||
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
|
||||
|
@@ -61,7 +61,8 @@ namespace cv { namespace gpu { namespace device
|
||||
void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, double fy, int interpolation, Stream& s)
|
||||
{
|
||||
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR || interpolation == INTER_CUBIC);
|
||||
CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR
|
||||
|| interpolation == INTER_CUBIC || interpolation == INTER_AREA);
|
||||
CV_Assert(!(dsize == Size()) || (fx > 0 && fy > 0));
|
||||
|
||||
if (dsize == Size())
|
||||
|
Reference in New Issue
Block a user