used new device layer for cv::gpu::reduce
This commit is contained in:
@@ -54,12 +54,52 @@ namespace cv { namespace cudev {
|
||||
|
||||
namespace grid_reduce_to_vec_detail
|
||||
{
|
||||
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor, int cn> struct Reduce;
|
||||
|
||||
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 1>
|
||||
{
|
||||
__device__ __forceinline__ static void call(work_elem_type smem[1][BLOCK_SIZE], work_type& myVal)
|
||||
{
|
||||
typename Reductor::template rebind<work_elem_type>::other op;
|
||||
blockReduce<BLOCK_SIZE>(smem[0], myVal, threadIdx.x, op);
|
||||
}
|
||||
};
|
||||
|
||||
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 2>
|
||||
{
|
||||
__device__ __forceinline__ static void call(work_elem_type smem[2][BLOCK_SIZE], work_type& myVal)
|
||||
{
|
||||
typename Reductor::template rebind<work_elem_type>::other op;
|
||||
blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1]), tie(myVal.x, myVal.y), threadIdx.x, make_tuple(op, op));
|
||||
}
|
||||
};
|
||||
|
||||
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 3>
|
||||
{
|
||||
__device__ __forceinline__ static void call(work_elem_type smem[3][BLOCK_SIZE], work_type& myVal)
|
||||
{
|
||||
typename Reductor::template rebind<work_elem_type>::other op;
|
||||
blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2]), tie(myVal.x, myVal.y, myVal.z), threadIdx.x, make_tuple(op, op, op));
|
||||
}
|
||||
};
|
||||
|
||||
template <int BLOCK_SIZE, typename work_type, typename work_elem_type, class Reductor> struct Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, 4>
|
||||
{
|
||||
__device__ __forceinline__ static void call(work_elem_type smem[4][BLOCK_SIZE], work_type& myVal)
|
||||
{
|
||||
typename Reductor::template rebind<work_elem_type>::other op;
|
||||
blockReduce<BLOCK_SIZE>(smem_tuple(smem[0], smem[1], smem[2], smem[3]), tie(myVal.x, myVal.y, myVal.z, myVal.w), threadIdx.x, make_tuple(op, op, op, op));
|
||||
}
|
||||
};
|
||||
|
||||
template <class Reductor, int BLOCK_SIZE, class SrcPtr, typename ResType, class MaskPtr>
|
||||
__global__ void reduceToColumn(const SrcPtr src, ResType* dst, const MaskPtr mask, const int cols)
|
||||
{
|
||||
typedef typename Reductor::work_type work_type;
|
||||
typedef typename VecTraits<work_type>::elem_type work_elem_type;
|
||||
const int cn = VecTraits<work_type>::cn;
|
||||
|
||||
__shared__ work_type smem[BLOCK_SIZE];
|
||||
__shared__ work_elem_type smem[cn][BLOCK_SIZE];
|
||||
|
||||
const int y = blockIdx.x;
|
||||
|
||||
@@ -75,7 +115,7 @@ namespace grid_reduce_to_vec_detail
|
||||
}
|
||||
}
|
||||
|
||||
blockReduce<BLOCK_SIZE>(smem, myVal, threadIdx.x, op);
|
||||
Reduce<BLOCK_SIZE, work_type, work_elem_type, Reductor, cn>::call(smem, myVal);
|
||||
|
||||
if (threadIdx.x == 0)
|
||||
dst[y] = saturate_cast<ResType>(Reductor::result(myVal, cols));
|
||||
|
@@ -49,6 +49,7 @@
|
||||
#include "../common.hpp"
|
||||
#include "../util/vec_traits.hpp"
|
||||
#include "../util/limits.hpp"
|
||||
#include "../util/saturate_cast.hpp"
|
||||
#include "../ptr2d/traits.hpp"
|
||||
#include "../ptr2d/gpumat.hpp"
|
||||
#include "../ptr2d/mask.hpp"
|
||||
@@ -62,6 +63,11 @@ template <typename T> struct Sum : plus<T>
|
||||
{
|
||||
typedef T work_type;
|
||||
|
||||
template <typename U> struct rebind
|
||||
{
|
||||
typedef Sum<U> other;
|
||||
};
|
||||
|
||||
__device__ __forceinline__ static T initialValue()
|
||||
{
|
||||
return VecTraits<T>::all(0);
|
||||
@@ -77,14 +83,19 @@ template <typename T> struct Avg : plus<T>
|
||||
{
|
||||
typedef T work_type;
|
||||
|
||||
template <typename U> struct rebind
|
||||
{
|
||||
typedef Avg<U> other;
|
||||
};
|
||||
|
||||
__device__ __forceinline__ static T initialValue()
|
||||
{
|
||||
return VecTraits<T>::all(0);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ static T result(T r, int sz)
|
||||
__device__ __forceinline__ static T result(T r, float sz)
|
||||
{
|
||||
return r / sz;
|
||||
return saturate_cast<T>(r / sz);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -92,6 +103,11 @@ template <typename T> struct Min : minimum<T>
|
||||
{
|
||||
typedef T work_type;
|
||||
|
||||
template <typename U> struct rebind
|
||||
{
|
||||
typedef Min<U> other;
|
||||
};
|
||||
|
||||
__device__ __forceinline__ static T initialValue()
|
||||
{
|
||||
return VecTraits<T>::all(numeric_limits<typename VecTraits<T>::elem_type>::max());
|
||||
@@ -107,6 +123,11 @@ template <typename T> struct Max : maximum<T>
|
||||
{
|
||||
typedef T work_type;
|
||||
|
||||
template <typename U> struct rebind
|
||||
{
|
||||
typedef Max<U> other;
|
||||
};
|
||||
|
||||
__device__ __forceinline__ static T initialValue()
|
||||
{
|
||||
return VecTraits<T>::all(-numeric_limits<typename VecTraits<T>::elem_type>::max());
|
||||
@@ -158,7 +179,7 @@ __host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, cons
|
||||
|
||||
CV_Assert( getRows(mask) == rows && getCols(mask) == cols );
|
||||
|
||||
createContinuous(rows, 1, DataType<ResType>::type, dst);
|
||||
dst.create(1, rows);
|
||||
|
||||
grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
|
||||
dst[0],
|
||||
@@ -173,7 +194,7 @@ __host__ void gridReduceToColumn_(const SrcPtr& src, GpuMat_<ResType>& dst, Stre
|
||||
const int rows = getRows(src);
|
||||
const int cols = getCols(src);
|
||||
|
||||
createContinuous(rows, 1, DataType<ResType>::type, dst);
|
||||
dst.create(1, rows);
|
||||
|
||||
grid_reduce_to_vec_detail::reduceToColumn<Reductor, Policy>(shrinkPtr(src),
|
||||
dst[0],
|
||||
|
Reference in New Issue
Block a user