fixed bug in gpu::remap under win32
This commit is contained in:
@@ -66,6 +66,24 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||||
|
{
|
||||||
|
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||||
|
|
||||||
|
dim3 block(32, 8);
|
||||||
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||||
|
|
||||||
|
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||||
|
BorderReader< PtrStep_<T>, B<work_type> > brdSrc(src, brd);
|
||||||
|
Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brdSrc);
|
||||||
|
|
||||||
|
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||||
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||||
{
|
{
|
||||||
@@ -163,22 +181,7 @@ namespace cv { namespace gpu { namespace imgproc
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue);
|
RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue);
|
||||||
else
|
else
|
||||||
callStream(src, mapx, mapy, dst, borderValue, stream);
|
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream);
|
||||||
}
|
|
||||||
|
|
||||||
static void callStream(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
|
||||||
|
|
||||||
dim3 block(32, 8);
|
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
|
||||||
|
|
||||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
|
||||||
BorderReader< PtrStep_<T>, B<work_type> > brd_src(src, brd);
|
|
||||||
Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brd_src);
|
|
||||||
|
|
||||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@@ -758,6 +758,29 @@ namespace cv { namespace gpu { namespace device
|
|||||||
const Ptr2D ptr;
|
const Ptr2D ptr;
|
||||||
const B b;
|
const B b;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// under win32 there is some bug with templated types that passed as kernel parameters
|
||||||
|
// with this specialization all works fine
|
||||||
|
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
|
||||||
|
{
|
||||||
|
typedef typename BrdConstant<D>::result_type elem_type;
|
||||||
|
typedef typename Ptr2D::index_type index_type;
|
||||||
|
|
||||||
|
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
|
||||||
|
src(src_), height(b.height), width(b.width), val(b.val)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
|
||||||
|
{
|
||||||
|
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||||
|
}
|
||||||
|
|
||||||
|
const Ptr2D src;
|
||||||
|
const int height;
|
||||||
|
const int width;
|
||||||
|
const D val;
|
||||||
|
};
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||||
|
@@ -80,7 +80,7 @@ TEST(remap)
|
|||||||
gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
|
gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
|
||||||
|
|
||||||
int interpolation = INTER_LINEAR;
|
int interpolation = INTER_LINEAR;
|
||||||
int borderMode = BORDER_CONSTANT;
|
int borderMode = BORDER_REPLICATE;
|
||||||
|
|
||||||
for (int size = 1000; size <= 4000; size *= 2)
|
for (int size = 1000; size <= 4000; size *= 2)
|
||||||
{
|
{
|
||||||
|
Reference in New Issue
Block a user