fixed bug with submatrix in some gpu functions
update gpu tests
This commit is contained in:
@@ -74,7 +74,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot1, rot + 3, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(crot2, rot + 6, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
::cv::gpu::device::transform(src, dst, TransformOp(), stream);
|
||||
cv::gpu::device::transform(src, dst, TransformOp(), WithOutMask(), stream);
|
||||
}
|
||||
} // namespace transform_points
|
||||
|
||||
@@ -113,7 +113,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ctransl, transl, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cproj0, proj, sizeof(float) * 3));
|
||||
cudaSafeCall(cudaMemcpyToSymbol(cproj1, proj + 3, sizeof(float) * 3));
|
||||
::cv::gpu::device::transform(src, dst, ProjectOp(), stream);
|
||||
cv::gpu::device::transform(src, dst, ProjectOp(), WithOutMask(), stream);
|
||||
}
|
||||
} // namespace project_points
|
||||
|
||||
|
@@ -226,7 +226,7 @@ namespace cv { namespace gpu { namespace device
|
||||
traits::functor_type functor = traits::create_functor(); \
|
||||
typedef typename traits::functor_type::argument_type src_t; \
|
||||
typedef typename traits::functor_type::result_type dst_t; \
|
||||
::cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, stream); \
|
||||
cv::gpu::device::transform((DevMem2D_<src_t>)src, (DevMem2D_<dst_t>)dst, functor, WithOutMask(), stream); \
|
||||
}
|
||||
|
||||
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
|
||||
|
@@ -84,9 +84,9 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T, typename D> void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Add<T, D>(), stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), SingleMask(mask), stream);
|
||||
else
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Add<T, D>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void add_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
@@ -181,9 +181,9 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||
AddScalar<T, D> op(val);
|
||||
if (mask.data)
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, SingleMask(mask), stream);
|
||||
else
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void add_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
@@ -277,9 +277,9 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T, typename D> void subtract_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream)
|
||||
{
|
||||
if (mask.data)
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, mask, Subtract<T, D>(), stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), SingleMask(mask), stream);
|
||||
else
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, Subtract<T, D>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
@@ -374,9 +374,9 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||
SubtractScalar<T, D> op(val);
|
||||
if (mask.data)
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, mask, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, SingleMask(mask), stream);
|
||||
else
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void subtract_gpu<uchar, uchar>(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
|
||||
@@ -462,7 +462,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
void multiply_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform(static_cast< DevMem2D_<uint> >(src1), src2, static_cast< DevMem2D_<uint> >(dst), multiply_8uc4_32f(), stream);
|
||||
cv::gpu::device::transform(static_cast< DevMem2D_<uint> >(src1), src2, static_cast< DevMem2D_<uint> >(dst), multiply_8uc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
struct multiply_16sc4_32f : binary_function<short4, float, short4>
|
||||
@@ -483,7 +483,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
void multiply_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), multiply_16sc4_32f(), stream);
|
||||
cv::gpu::device::transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), multiply_16sc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename D> struct Multiply : binary_function<T, T, D>
|
||||
@@ -521,7 +521,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||
Multiply<T, D> op(scale);
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
@@ -617,7 +617,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||
MultiplyScalar<T, D> op(val, scale);
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
@@ -698,7 +698,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
void divide_gpu(const DevMem2D_<uchar4>& src1, const DevMem2Df& src2, const DevMem2D_<uchar4>& dst, cudaStream_t stream)
|
||||
{
|
||||
transform(static_cast< DevMem2D_<uchar4> >(src1), src2, static_cast< DevMem2D_<uchar4> >(dst), divide_8uc4_32f(), stream);
|
||||
cv::gpu::device::transform(static_cast< DevMem2D_<uchar4> >(src1), src2, static_cast< DevMem2D_<uchar4> >(dst), divide_8uc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -721,7 +721,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
void divide_gpu(const DevMem2D_<short4>& src1, const DevMem2Df& src2, const DevMem2D_<short4>& dst, cudaStream_t stream)
|
||||
{
|
||||
transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), divide_16sc4_32f(), stream);
|
||||
cv::gpu::device::transform(static_cast< DevMem2D_<short4> >(src1), src2, static_cast< DevMem2D_<short4> >(dst), divide_16sc4_32f(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T, typename D> struct Divide : binary_function<T, T, D>
|
||||
@@ -759,7 +759,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||
Divide<T, D> op(scale);
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
@@ -855,7 +855,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
|
||||
DivideScalar<T, D> op(val, scale);
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void divide_gpu<uchar, uchar >(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
|
||||
@@ -949,7 +949,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&scalar) );
|
||||
Reciprocal<T, D> op(scalar);
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void divide_gpu<uchar, uchar >(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@@ -1055,7 +1055,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <typename T> void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@@ -1101,7 +1101,7 @@ namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&val) );
|
||||
AbsdiffScalar<T> op(val);
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@@ -1188,7 +1188,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <template <typename> class Op, typename T> void compare(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
||||
{
|
||||
Op<T> op;
|
||||
::cv::gpu::device::transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, stream);
|
||||
cv::gpu::device::transform(static_cast< DevMem2D_<T> >(src1), static_cast< DevMem2D_<T> >(src2), dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void compare_eq(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream)
|
||||
@@ -1546,7 +1546,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform(src1, src2, dst, minimum<T>(), stream);
|
||||
cv::gpu::device::transform(src1, src2, dst, minimum<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@@ -1560,7 +1560,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform(src1, src2, dst, maximum<T>(), stream);
|
||||
cv::gpu::device::transform(src1, src2, dst, maximum<T>(), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@@ -1574,7 +1574,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), stream);
|
||||
cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@@ -1588,7 +1588,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template <typename T>
|
||||
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), stream);
|
||||
cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||
@@ -1635,19 +1635,17 @@ namespace cv { namespace gpu { namespace device
|
||||
};
|
||||
|
||||
template <template <typename> class Op, typename T>
|
||||
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
|
||||
cudaStream_t stream)
|
||||
void threshold_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal, cudaStream_t stream)
|
||||
{
|
||||
Op<T> op(thresh, maxVal);
|
||||
::cv::gpu::device::transform(src, dst, op, stream);
|
||||
cv::gpu::device::transform(src, dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void threshold_gpu(const DevMem2Db& src, const DevMem2Db& dst, T thresh, T maxVal, int type,
|
||||
cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal,
|
||||
cudaStream_t stream);
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, T thresh, T maxVal, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[] =
|
||||
{
|
||||
@@ -1737,7 +1735,7 @@ namespace cv { namespace gpu { namespace device
|
||||
template<typename T>
|
||||
void pow_caller(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream)
|
||||
{
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, PowOp<T>(power), WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void pow_caller<uchar>(const DevMem2Db& src, float power, DevMem2Db dst, cudaStream_t stream);
|
||||
@@ -1829,7 +1827,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
AddWeighted<T1, T2, D> op(alpha, beta, gamma);
|
||||
|
||||
::cv::gpu::device::transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, stream);
|
||||
cv::gpu::device::transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), static_cast< DevMem2D_<D> >(dst), op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
template void addWeighted_gpu<uchar, uchar, uchar>(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream);
|
||||
|
@@ -44,7 +44,6 @@
|
||||
#include "opencv2/gpu/device/limits.hpp"
|
||||
#include "opencv2/gpu/device/saturate_cast.hpp"
|
||||
#include "opencv2/gpu/device/vec_math.hpp"
|
||||
#include "opencv2/gpu/device/transform.hpp"
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
|
@@ -67,7 +67,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherStream
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst,
|
||||
static void call(DevMem2D_<T> src, DevMem2Df mapx, DevMem2Df mapy, DevMem2D_<T> dst,
|
||||
const float* borderValue, cudaStream_t stream, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
@@ -86,7 +86,8 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcherNonStream
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, const float* borderValue, int)
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
|
||||
DevMem2D_<T> dst, const float* borderValue, int)
|
||||
{
|
||||
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
|
||||
|
||||
@@ -110,20 +111,23 @@ namespace cv { namespace gpu { namespace device
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_remap_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_remap_ ## type , x, y); \
|
||||
return tex2D(tex_remap_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter, template <typename> class B> struct RemapDispatcherNonStream<Filter, B, type> \
|
||||
{ \
|
||||
static void call(const DevMem2D_< type >& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_< type >& dst, const float* borderValue, int cc) \
|
||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, \
|
||||
DevMem2D_< type > dst, const float* borderValue, int cc) \
|
||||
{ \
|
||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||
dim3 block(32, cc >= 20 ? 8 : 4); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , src); \
|
||||
tex_remap_ ## type ##_reader texSrc; \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
B<work_type> brd(src.rows, src.cols, VecTraits<work_type>::make(borderValue)); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, B<work_type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, B<work_type> > > filter_src(brdSrc); \
|
||||
@@ -134,14 +138,25 @@ namespace cv { namespace gpu { namespace device
|
||||
}; \
|
||||
template <template <typename> class Filter> struct RemapDispatcherNonStream<Filter, BrdReplicate, type> \
|
||||
{ \
|
||||
static void call(const DevMem2D_< type >& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_< type >& dst, const float*, int) \
|
||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy, \
|
||||
DevMem2D_< type > dst, const float*, int) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_remap_ ## type , src); \
|
||||
tex_remap_ ## type ##_reader texSrc; \
|
||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
bindTexture(&tex_remap_ ## type , srcWhole); \
|
||||
tex_remap_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
if (srcWhole.cols == src.cols && srcWhole.rows == src.rows) \
|
||||
{ \
|
||||
Filter< tex_remap_ ## type ##_reader > filter_src(texSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
BrdReplicate<type> brd(src.rows, src.cols); \
|
||||
BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > brdSrc(texSrc, brd); \
|
||||
Filter< BorderReader< tex_remap_ ## type ##_reader, BrdReplicate<type> > > filter_src(brdSrc); \
|
||||
remap<<<grid, block>>>(filter_src, mapx, mapy, dst); \
|
||||
} \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
} \
|
||||
@@ -175,21 +190,21 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class Filter, template <typename> class B, typename T> struct RemapDispatcher
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst,
|
||||
const float* borderValue, cudaStream_t stream, int cc)
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df mapx, DevMem2Df mapy,
|
||||
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
if (stream == 0)
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, cc);
|
||||
RemapDispatcherNonStream<Filter, B, T>::call(src, srcWhole, xoff, yoff, mapx, mapy, dst, borderValue, cc);
|
||||
else
|
||||
RemapDispatcherStream<Filter, B, T>::call(src, mapx, mapy, dst, borderValue, stream, cc);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void remap_gpu(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation,
|
||||
int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
template <typename T> void remap_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
||||
DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc)
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst,
|
||||
const float* borderValue, cudaStream_t stream, int cc);
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap,
|
||||
DevMem2D_<T> dst, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
static const caller_t callers[3][5] =
|
||||
{
|
||||
@@ -216,37 +231,38 @@ namespace cv { namespace gpu { namespace device
|
||||
}
|
||||
};
|
||||
|
||||
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
|
||||
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, xmap, ymap,
|
||||
static_cast< DevMem2D_<T> >(dst), borderValue, stream, cc);
|
||||
}
|
||||
|
||||
template void remap_gpu<uchar >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<uchar2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void remap_gpu<schar>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<ushort >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<ushort2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<short >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<short2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
//template void remap_gpu<int >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
|
||||
template void remap_gpu<float >(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<float2>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float3>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float4>(const DevMem2Db& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2Db& dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
//template void remap_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
template void remap_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, DevMem2Df xmap, DevMem2Df ymap, DevMem2Db dst, int interpolation, int borderMode, const float* borderValue, cudaStream_t stream, int cc);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
@@ -80,7 +80,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
@@ -95,7 +95,7 @@ namespace cv { namespace gpu { namespace device
|
||||
};
|
||||
template <typename T> struct ResizeDispatcherStream<PointFilter, T>
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
static void call(DevMem2D_<T> src, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
@@ -110,7 +110,7 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
@@ -127,7 +127,7 @@ namespace cv { namespace gpu { namespace device
|
||||
};
|
||||
template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst)
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst)
|
||||
{
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
|
||||
@@ -148,19 +148,21 @@ namespace cv { namespace gpu { namespace device
|
||||
{ \
|
||||
typedef type elem_type; \
|
||||
typedef int index_type; \
|
||||
int xoff, yoff; \
|
||||
tex_resize_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||
{ \
|
||||
return tex2D(tex_resize_ ## type , x, y); \
|
||||
return tex2D(tex_resize_ ## type , x + xoff, y + yoff); \
|
||||
} \
|
||||
}; \
|
||||
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type> \
|
||||
{ \
|
||||
static void call(const DevMem2D_< type >& src, float fx, float fy, const DevMem2D_< type >& dst) \
|
||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_resize_ ## type , src); \
|
||||
tex_resize_ ## type ##_reader texSrc; \
|
||||
bindTexture(&tex_resize_ ## type , srcWhole); \
|
||||
tex_resize_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
Filter< tex_resize_ ## type ##_reader > filter_src(texSrc); \
|
||||
resize<<<grid, block>>>(filter_src, fx, fy, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
@@ -169,12 +171,12 @@ namespace cv { namespace gpu { namespace device
|
||||
}; \
|
||||
template <> struct ResizeDispatcherNonStream<PointFilter, type> \
|
||||
{ \
|
||||
static void call(const DevMem2D_< type >& src, float fx, float fy, const DevMem2D_< type >& dst) \
|
||||
static void call(DevMem2D_< type > src, DevMem2D_< type > srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_< type > dst) \
|
||||
{ \
|
||||
dim3 block(32, 8); \
|
||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||
bindTexture(&tex_resize_ ## type , src); \
|
||||
tex_resize_ ## type ##_reader texSrc; \
|
||||
bindTexture(&tex_resize_ ## type , srcWhole); \
|
||||
tex_resize_ ## type ##_reader texSrc(xoff, yoff); \
|
||||
resizeNN<<<grid, block>>>(texSrc, fx, fy, dst); \
|
||||
cudaSafeCall( cudaGetLastError() ); \
|
||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||
@@ -209,55 +211,57 @@ namespace cv { namespace gpu { namespace device
|
||||
|
||||
template <template <typename> class Filter, typename T> struct ResizeDispatcher
|
||||
{
|
||||
static void call(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream)
|
||||
static void call(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream)
|
||||
{
|
||||
if (stream == 0)
|
||||
ResizeDispatcherNonStream<Filter, T>::call(src, fx, fy, dst);
|
||||
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst);
|
||||
else
|
||||
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
|
||||
}
|
||||
};
|
||||
|
||||
template <typename T> void resize_gpu(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream)
|
||||
template <typename T> void resize_gpu(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy,
|
||||
DevMem2Db dst, int interpolation, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*caller_t)(const DevMem2D_<T>& src, float fx, float fy, const DevMem2D_<T>& dst, cudaStream_t stream);
|
||||
typedef void (*caller_t)(DevMem2D_<T> src, DevMem2D_<T> srcWhole, int xoff, int yoff, float fx, float fy, DevMem2D_<T> dst, cudaStream_t stream);
|
||||
|
||||
static const caller_t callers[3] =
|
||||
{
|
||||
ResizeDispatcher<PointFilter, T>::call, ResizeDispatcher<LinearFilter, T>::call, ResizeDispatcher<CubicFilter, T>::call
|
||||
};
|
||||
|
||||
callers[interpolation](static_cast< DevMem2D_<T> >(src), fx, fy, static_cast< DevMem2D_<T> >(dst), stream);
|
||||
callers[interpolation](static_cast< DevMem2D_<T> >(src), static_cast< DevMem2D_<T> >(srcWhole), xoff, yoff, fx, fy,
|
||||
static_cast< DevMem2D_<T> >(dst), stream);
|
||||
}
|
||||
|
||||
template void resize_gpu<uchar >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<uchar2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<uchar2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<uchar4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<schar>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<schar>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<char4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<ushort >(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<ushort2>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort3>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort4>(const DevMem2Db& src,float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<ushort2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<ushort4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<short >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<short2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<short2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<short4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
//template void resize_gpu<int >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<int4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
|
||||
template void resize_gpu<float >(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<float2>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float3>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float4>(const DevMem2Db& src, float fx, float fy, const DevMem2Db& dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float >(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
//template void resize_gpu<float2>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float3>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
template void resize_gpu<float4>(DevMem2Db src, DevMem2Db srcWhole, int xoff, int yoff, float fx, float fy, DevMem2Db dst, int interpolation, cudaStream_t stream);
|
||||
} // namespace imgproc
|
||||
}}} // namespace cv { namespace gpu { namespace device
|
||||
|
Reference in New Issue
Block a user