used new device layer for cv::gpu::sum
This commit is contained in:
@@ -418,9 +418,7 @@ namespace grid_reduce_detail
|
||||
const dim3 block(Policy::block_size_x, Policy::block_size_y);
|
||||
const dim3 grid(divUp(cols, block.x * Policy::patch_size_x), divUp(rows, block.y * Policy::patch_size_y));
|
||||
|
||||
const int BLOCK_SIZE = Policy::block_size_x * Policy::block_size_y;
|
||||
|
||||
glob_reduce<Reductor, BLOCK_SIZE, Policy::patch_size_x, Policy::patch_size_y><<<grid, block, 0, stream>>>(src, result, mask, rows, cols);
|
||||
glob_reduce<Reductor, Policy::block_size_x * Policy::block_size_y, Policy::patch_size_x, Policy::patch_size_y><<<grid, block, 0, stream>>>(src, result, mask, rows, cols);
|
||||
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
@@ -433,10 +431,9 @@ namespace grid_reduce_detail
|
||||
__host__ void sum(const SrcPtr& src, ResType* result, const MaskPtr& mask, int rows, int cols, cudaStream_t stream)
|
||||
{
|
||||
typedef typename PtrTraits<SrcPtr>::value_type src_type;
|
||||
const int cn = VecTraits<src_type>::cn;
|
||||
typedef typename MakeVec<ResType, cn>::type work_type;
|
||||
typedef typename VecTraits<ResType>::elem_type res_elem_type;
|
||||
|
||||
glob_reduce<SumReductor<src_type, work_type>, Policy>(src, result, mask, rows, cols, stream);
|
||||
glob_reduce<SumReductor<src_type, ResType>, Policy>(src, (res_elem_type*) result, mask, rows, cols, stream);
|
||||
}
|
||||
|
||||
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
|
||||
|
@@ -59,6 +59,10 @@ namespace cv { namespace cudev {
|
||||
template <class Policy, class SrcPtr, typename ResType, class MaskPtr>
|
||||
__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskPtr& mask, Stream& stream = Stream::Null())
|
||||
{
|
||||
typedef typename PtrTraits<SrcPtr>::value_type src_type;
|
||||
|
||||
CV_StaticAssert( VecTraits<src_type>::cn == VecTraits<ResType>::cn, "" );
|
||||
|
||||
dst.create(1, 1);
|
||||
dst.setTo(0, stream);
|
||||
|
||||
@@ -77,6 +81,10 @@ __host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, const MaskP
|
||||
template <class Policy, class SrcPtr, typename ResType>
|
||||
__host__ void gridCalcSum_(const SrcPtr& src, GpuMat_<ResType>& dst, Stream& stream = Stream::Null())
|
||||
{
|
||||
typedef typename PtrTraits<SrcPtr>::value_type src_type;
|
||||
|
||||
CV_StaticAssert( VecTraits<src_type>::cn == VecTraits<ResType>::cn, "" );
|
||||
|
||||
dst.create(1, 1);
|
||||
dst.setTo(0, stream);
|
||||
|
||||
|
@@ -194,10 +194,23 @@ CV_CUDEV_IMPLEMENT_VEC_UNARY_OP(~, uint, uint)
|
||||
return VecTraits<output_type ## 4>::make(func (a.x), func (a.y), func (a.z), func (a.w)); \
|
||||
}
|
||||
|
||||
namespace vec_math_detail
|
||||
{
|
||||
__device__ __forceinline__ schar abs_(schar val)
|
||||
{
|
||||
return (schar) ::abs((int) val);
|
||||
}
|
||||
|
||||
__device__ __forceinline__ short abs_(short val)
|
||||
{
|
||||
return (short) ::abs((int) val);
|
||||
}
|
||||
}
|
||||
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uchar, uchar)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, vec_math_detail::abs_, char, char)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, ushort, ushort)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, vec_math_detail::abs_, short, short)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::abs, int, int)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, /*::abs*/, uint, uint)
|
||||
CV_CUDEV_IMPLEMENT_VEC_UNARY_FUNC(abs, ::fabsf, float, float)
|
||||
|
Reference in New Issue
Block a user