fixed bug in gpu::remap under win32
This commit is contained in:
@@ -67,6 +67,24 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep_<T>, B<work_type> > brdSrc(src, brd);
|
||||
Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brdSrc);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
}
|
||||
};
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue)
|
||||
@@ -163,22 +181,7 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
if (stream == 0)
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue);
|
||||
else
|
||||
callStream(src, mapx, mapy, dst, borderValue, stream);
|
||||
}
|
||||
|
||||
static void callStream(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, cudaStream_t stream)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue));
|
||||
BorderReader< PtrStep_<T>, B<work_type> > brd_src(src, brd);
|
||||
Filter< BorderReader< PtrStep_<T>, B<work_type> > > filter_src(brd_src);
|
||||
|
||||
remap<<<grid, block, 0, stream>>>(filter_src, mapx, mapy, dst);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream);
|
||||
}
|
||||
};
|
||||
|
||||
|
@@ -758,6 +758,29 @@ namespace cv { namespace gpu { namespace device
|
||||
const Ptr2D ptr;
|
||||
const B b;
|
||||
};
|
||||
|
||||
// under win32 there is some bug with templated types that passed as kernel parameters
|
||||
// with this specialization all works fine
|
||||
template <typename Ptr2D, typename D> struct BorderReader< Ptr2D, BrdConstant<D> >
|
||||
{
|
||||
typedef typename BrdConstant<D>::result_type elem_type;
|
||||
typedef typename Ptr2D::index_type index_type;
|
||||
|
||||
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& src_, const BrdConstant<D>& b) :
|
||||
src(src_), height(b.height), width(b.width), val(b.val)
|
||||
{
|
||||
}
|
||||
|
||||
__device__ __forceinline__ D operator ()(index_type y, index_type x) const
|
||||
{
|
||||
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
|
||||
}
|
||||
|
||||
const Ptr2D src;
|
||||
const int height;
|
||||
const int width;
|
||||
const D val;
|
||||
};
|
||||
}}}
|
||||
|
||||
#endif // __OPENCV_GPU_BORDER_INTERPOLATE_HPP__
|
||||
|
@@ -80,7 +80,7 @@ TEST(remap)
|
||||
gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
|
||||
|
||||
int interpolation = INTER_LINEAR;
|
||||
int borderMode = BORDER_CONSTANT;
|
||||
int borderMode = BORDER_REPLICATE;
|
||||
|
||||
for (int size = 1000; size <= 4000; size *= 2)
|
||||
{
|
||||
|
Reference in New Issue
Block a user