Fixed GPU version of multi-band blending in stitching module
This commit is contained in:
@@ -654,27 +654,18 @@ namespace cv { namespace gpu { namespace device
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiply
|
||||
|
||||
struct add_16sc4 : binary_function<short4, short4, short4>
|
||||
template <> struct TransformFunctorTraits< plus<short> > : DefaultTransformFunctorTraits< plus<short> >
|
||||
{
|
||||
__device__ __forceinline__ short4 operator ()(short4 a, short4 b) const
|
||||
{
|
||||
return make_short4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
|
||||
}
|
||||
};
|
||||
|
||||
template <> struct TransformFunctorTraits<add_16sc4> : DefaultTransformFunctorTraits<add_16sc4>
|
||||
{
|
||||
enum { smart_block_dim_x = 8 };
|
||||
enum { smart_block_dim_y = 8 };
|
||||
enum { smart_shift = 8 };
|
||||
enum { smart_shift = 4 };
|
||||
};
|
||||
|
||||
void add_gpu(const DevMem2D_<short4>& src1, const DevMem2D_<short4>& src2, const DevMem2D_<short4>& dst, cudaStream_t stream)
|
||||
template <typename T> void add_gpu(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream)
|
||||
{
|
||||
transform(static_cast< DevMem2D_<short4> >(src1), static_cast< DevMem2D_<short4> >(src2),
|
||||
static_cast< DevMem2D_<short4> >(dst), add_16sc4(), stream);
|
||||
transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, plus<T>(), stream);
|
||||
}
|
||||
|
||||
template void add_gpu<short>(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);
|
||||
|
||||
//////////////////////////////////////////////////////////////////////////
|
||||
// multiply
|
||||
|
||||
@@ -177,16 +177,17 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
void add_gpu(const DevMem2D_<short4>& src1, const DevMem2D_<short4>& src2, const DevMem2D_<short4>& dst, cudaStream_t stream);
|
||||
template <typename T>
|
||||
void add_gpu(const DevMem2D src1, const DevMem2D src2, DevMem2D dst, cudaStream_t stream);
|
||||
}}}
|
||||
|
||||
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
|
||||
{
|
||||
if (src1.type() == CV_16SC4 && src2.type() == CV_16SC4)
|
||||
if (src1.depth() == CV_16S && src2.depth() == CV_16S)
|
||||
{
|
||||
CV_Assert(src1.size() == src2.size());
|
||||
dst.create(src1.size(), src1.type());
|
||||
device::add_gpu(src1, src2, dst, StreamAccessor::getStream(stream));
|
||||
device::add_gpu<short>(src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, StreamAccessor::getStream(stream));
|
||||
|
||||
Reference in New Issue
Block a user