Fixed bug with submatrix in device::transform
This commit is contained in:
parent
dab3586792
commit
d13a6b74b2
@ -448,7 +448,7 @@ namespace cv { namespace gpu
|
|||||||
{
|
{
|
||||||
int area = rows * cols;
|
int area = rows * cols;
|
||||||
if (!m.isContinuous() || m.type() != type || m.size().area() != area)
|
if (!m.isContinuous() || m.type() != type || m.size().area() != area)
|
||||||
m.create(1, area, type);
|
ensureSizeIsEnough(1, area, type, m);
|
||||||
m = m.reshape(0, rows);
|
m = m.reshape(0, rows);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1058,12 +1058,12 @@ namespace cv { namespace gpu { namespace device
|
|||||||
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
|
::cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template void absdiff_gpu<schar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<schar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template void absdiff_gpu<ushort>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<ushort>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template void absdiff_gpu<short >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<short >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
//template void absdiff_gpu<int >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<int >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
//template void absdiff_gpu<float >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<float >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
template void absdiff_gpu<double>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
template void absdiff_gpu<double>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
|
||||||
|
|
||||||
template <typename T> struct AbsdiffScalar : unary_function<T, T>
|
template <typename T> struct AbsdiffScalar : unary_function<T, T>
|
||||||
|
@ -159,7 +159,13 @@ void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const Gpu
|
|||||||
|
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
if (mask.empty() && dst.type() == src1.type() && (src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F))
|
bool useNpp =
|
||||||
|
mask.empty() &&
|
||||||
|
dst.type() == src1.type() &&
|
||||||
|
(src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F) &&
|
||||||
|
(isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16));
|
||||||
|
|
||||||
|
if (useNpp)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, stream);
|
nppArithmCaller(src1, src2, dst, nppiAdd_8u_C1RSfs, nppiAdd_8u_C4RSfs, nppiAdd_32s_C1R, nppiAdd_32f_C1R, stream);
|
||||||
return;
|
return;
|
||||||
@ -271,7 +277,13 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
|
|||||||
|
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
cudaStream_t stream = StreamAccessor::getStream(s);
|
||||||
|
|
||||||
if (mask.empty() && dst.type() == src1.type() && (src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F))
|
bool useNpp =
|
||||||
|
mask.empty() &&
|
||||||
|
dst.type() == src1.type() &&
|
||||||
|
(src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F) &&
|
||||||
|
(isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16));
|
||||||
|
|
||||||
|
if (useNpp)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R, stream);
|
nppArithmCaller(src2, src1, dst, nppiSub_8u_C1RSfs, nppiSub_8u_C4RSfs, nppiSub_32s_C1R, nppiSub_32f_C1R, stream);
|
||||||
return;
|
return;
|
||||||
@ -403,8 +415,13 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
|
|||||||
|
|
||||||
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
|
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
|
||||||
|
|
||||||
|
bool useNpp =
|
||||||
|
scale == 1 &&
|
||||||
|
dst.type() == src1.type() &&
|
||||||
|
(src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F) &&
|
||||||
|
(isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16));
|
||||||
|
|
||||||
if (scale == 1 && dst.type() == src1.type() && (src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F))
|
if (useNpp)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src2, src1, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R, stream);
|
nppArithmCaller(src2, src1, dst, nppiMul_8u_C1RSfs, nppiMul_8u_C4RSfs, nppiMul_32s_C1R, nppiMul_32f_C1R, stream);
|
||||||
return;
|
return;
|
||||||
@ -528,8 +545,13 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
|
|||||||
|
|
||||||
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
|
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
|
||||||
|
|
||||||
|
bool useNpp =
|
||||||
|
scale == 1 &&
|
||||||
|
dst.type() == src1.type() &&
|
||||||
|
(src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F) &&
|
||||||
|
(isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16));
|
||||||
|
|
||||||
if (scale == 1 && dst.type() == src1.type() && (src1.depth() == CV_8U || src1.depth() == CV_32S || src1.depth() == CV_32F))
|
if (useNpp)
|
||||||
{
|
{
|
||||||
nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R, stream);
|
nppArithmCaller(src2, src1, dst, nppiDiv_8u_C1RSfs, nppiDiv_8u_C4RSfs, nppiDiv_32s_C1R, nppiDiv_32f_C1R, stream);
|
||||||
return;
|
return;
|
||||||
@ -643,7 +665,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
|
|
||||||
static const func_t funcs[] =
|
static const func_t funcs[] =
|
||||||
{
|
{
|
||||||
0/*absdiff_gpu<unsigned char>*/, absdiff_gpu<signed char>, absdiff_gpu<unsigned short>, absdiff_gpu<short>, 0/*absdiff_gpu<int>*/, 0/*absdiff_gpu<float>*/, absdiff_gpu<double>
|
absdiff_gpu<unsigned char>, absdiff_gpu<signed char>, absdiff_gpu<unsigned short>, absdiff_gpu<short>, absdiff_gpu<int>, absdiff_gpu<float>, absdiff_gpu<double>
|
||||||
};
|
};
|
||||||
|
|
||||||
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
|
||||||
@ -656,7 +678,9 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
sz.width = src1.cols * src1.channels();
|
sz.width = src1.cols * src1.channels();
|
||||||
sz.height = src1.rows;
|
sz.height = src1.rows;
|
||||||
|
|
||||||
if (src1.depth() == CV_8U && (src1.cols * src1.channels()) % 4 == 0)
|
bool aligned = isAligned(src1.data, 16) && isAligned(src2.data, 16) && isAligned(dst.data, 16);
|
||||||
|
|
||||||
|
if (aligned && src1.depth() == CV_8U && (src1.cols * src1.channels()) % 4 == 0)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -668,7 +692,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
else if (src1.depth() == CV_8U)
|
else if (aligned && src1.depth() == CV_8U)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -678,7 +702,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
else if (src1.depth() == CV_32S)
|
else if (aligned && src1.depth() == CV_32S)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -688,7 +712,7 @@ void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Strea
|
|||||||
if (stream == 0)
|
if (stream == 0)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
else if (src1.depth() == CV_32F)
|
else if (aligned && src1.depth() == CV_32F)
|
||||||
{
|
{
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
|
@ -67,6 +67,11 @@
|
|||||||
namespace cv { namespace gpu
|
namespace cv { namespace gpu
|
||||||
{
|
{
|
||||||
void error(const char *error_string, const char *file, const int line, const char *func);
|
void error(const char *error_string, const char *file, const int line, const char *func);
|
||||||
|
|
||||||
|
template <typename T> static inline bool isAligned(const T* ptr, size_t size)
|
||||||
|
{
|
||||||
|
return reinterpret_cast<size_t>(ptr) % size == 0;
|
||||||
|
}
|
||||||
}}
|
}}
|
||||||
|
|
||||||
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
static inline void ___cudaSafeCall(cudaError_t err, const char *file, const int line, const char *func = "")
|
||||||
|
@ -309,7 +309,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template<> struct TransformDispatcher<false>
|
template<> struct TransformDispatcher<false>
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, const Mask& mask, cudaStream_t stream)
|
static void call(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<UnOp> ft;
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
|
||||||
@ -324,7 +324,7 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, const Mask& mask, cudaStream_t stream)
|
static void call(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
|
||||||
@ -341,12 +341,18 @@ namespace cv { namespace gpu { namespace device
|
|||||||
template<> struct TransformDispatcher<true>
|
template<> struct TransformDispatcher<true>
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
static void call(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, const Mask& mask, cudaStream_t stream)
|
static void call(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<UnOp> ft;
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
|
|
||||||
StaticAssert<ft::smart_shift != 1>::check();
|
StaticAssert<ft::smart_shift != 1>::check();
|
||||||
|
|
||||||
|
if (!isAligned(src.data, ft::smart_shift * sizeof(T)) || !isAligned(dst.data, ft::smart_shift * sizeof(D)))
|
||||||
|
{
|
||||||
|
TransformDispatcher<false>::call(src, dst, op, mask, stream);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||||
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
const dim3 grid(divUp(src.cols, threads.x * ft::smart_shift), divUp(src.rows, threads.y), 1);
|
||||||
|
|
||||||
@ -358,12 +364,18 @@ namespace cv { namespace gpu { namespace device
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
static void call(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, const Mask& mask, cudaStream_t stream)
|
static void call(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
|
|
||||||
StaticAssert<ft::smart_shift != 1>::check();
|
StaticAssert<ft::smart_shift != 1>::check();
|
||||||
|
|
||||||
|
if (!isAligned(src1.data, ft::smart_shift * sizeof(T1)) || !isAligned(src2.data, ft::smart_shift * sizeof(T2)) || !isAligned(dst.data, ft::smart_shift * sizeof(D)))
|
||||||
|
{
|
||||||
|
TransformDispatcher<false>::call(src1, src2, dst, op, mask, stream);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
const dim3 threads(ft::smart_block_dim_x, ft::smart_block_dim_y, 1);
|
||||||
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
const dim3 grid(divUp(src1.cols, threads.x * ft::smart_shift), divUp(src1.rows, threads.y), 1);
|
||||||
|
|
||||||
@ -376,14 +388,14 @@ namespace cv { namespace gpu { namespace device
|
|||||||
};
|
};
|
||||||
|
|
||||||
template <typename T, typename D, typename UnOp, typename Mask>
|
template <typename T, typename D, typename UnOp, typename Mask>
|
||||||
static void transform_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, const Mask& mask, cudaStream_t stream)
|
static inline void transform_caller(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<UnOp> ft;
|
typedef TransformFunctorTraits<UnOp> ft;
|
||||||
TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
TransformDispatcher<VecTraits<T>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src, dst, op, mask, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
template <typename T1, typename T2, typename D, typename BinOp, typename Mask>
|
||||||
static void transform_caller(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, const Mask& mask, cudaStream_t stream)
|
static inline void transform_caller(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, Mask mask, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
typedef TransformFunctorTraits<BinOp> ft;
|
typedef TransformFunctorTraits<BinOp> ft;
|
||||||
TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
TransformDispatcher<VecTraits<T1>::cn == 1 && VecTraits<T2>::cn == 1 && VecTraits<D>::cn == 1 && ft::smart_shift != 1>::call(src1, src2, dst, op, mask, stream);
|
||||||
|
@ -50,25 +50,25 @@
|
|||||||
namespace cv { namespace gpu { namespace device
|
namespace cv { namespace gpu { namespace device
|
||||||
{
|
{
|
||||||
template <typename T, typename D, typename UnOp>
|
template <typename T, typename D, typename UnOp>
|
||||||
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const UnOp& op, cudaStream_t stream = 0)
|
static inline void transform(DevMem2D_<T> src, DevMem2D_<D> dst, UnOp op, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
transform_detail::transform_caller(src, dst, op, WithOutMask(), stream);
|
transform_detail::transform_caller(src, dst, op, WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D, typename UnOp>
|
template <typename T, typename D, typename UnOp>
|
||||||
void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, const PtrStepb& mask, const UnOp& op, cudaStream_t stream = 0)
|
static inline void transform(DevMem2D_<T> src, DevMem2D_<D> dst, PtrStepb mask, UnOp op, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
transform_detail::transform_caller(src, dst, op, SingleMask(mask), stream);
|
transform_detail::transform_caller(src, dst, op, SingleMask(mask), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp>
|
template <typename T1, typename T2, typename D, typename BinOp>
|
||||||
void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const BinOp& op, cudaStream_t stream = 0)
|
static inline void transform(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, BinOp op, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
transform_detail::transform_caller(src1, src2, dst, op, WithOutMask(), stream);
|
transform_detail::transform_caller(src1, src2, dst, op, WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T1, typename T2, typename D, typename BinOp>
|
template <typename T1, typename T2, typename D, typename BinOp>
|
||||||
void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, const PtrStepb& mask, const BinOp& op, cudaStream_t stream = 0)
|
static inline void transform(DevMem2D_<T1> src1, DevMem2D_<T2> src2, DevMem2D_<D> dst, PtrStepb mask, BinOp op, cudaStream_t stream = 0)
|
||||||
{
|
{
|
||||||
transform_detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
|
transform_detail::transform_caller(src1, src2, dst, op, SingleMask(mask), stream);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user