fixed bug with submatrix in some gpu functions
update gpu tests
This commit is contained in:
@@ -59,56 +59,27 @@ namespace cv { namespace gpu { namespace device
|
||||
////////////////////////////////// CopyTo /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
template<typename T>
|
||||
__global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
|
||||
{
|
||||
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if ((x < cols * channels ) && (y < rows))
|
||||
if (mask[y * step_mask + x / channels] != 0)
|
||||
{
|
||||
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
|
||||
mat_dst[idx] = mat_src[idx];
|
||||
}
|
||||
template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream)
|
||||
{
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, channels), stream);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void copy_to_with_mask_run(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream)
|
||||
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream)
|
||||
{
|
||||
dim3 threadsPerBlock(16,16, 1);
|
||||
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
||||
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream);
|
||||
|
||||
copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
|
||||
((T*)mat_src.data, (T*)mat_dst.data, (unsigned char*)mask.data, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == 0)
|
||||
cudaSafeCall ( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void copy_to_with_mask(DevMem2Db mat_src, DevMem2Db mat_dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream)
|
||||
{
|
||||
typedef void (*CopyToFunc)(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream);
|
||||
|
||||
static CopyToFunc tab[8] =
|
||||
static func_t tab[] =
|
||||
{
|
||||
copy_to_with_mask_run<unsigned char>,
|
||||
copy_to_with_mask_run<signed char>,
|
||||
copy_to_with_mask_run<unsigned short>,
|
||||
copy_to_with_mask_run<short>,
|
||||
copy_to_with_mask_run<int>,
|
||||
copy_to_with_mask_run<float>,
|
||||
copy_to_with_mask_run<double>,
|
||||
0
|
||||
copyToWithMask<unsigned char>,
|
||||
copyToWithMask<signed char>,
|
||||
copyToWithMask<unsigned short>,
|
||||
copyToWithMask<short>,
|
||||
copyToWithMask<int>,
|
||||
copyToWithMask<float>,
|
||||
copyToWithMask<double>
|
||||
};
|
||||
|
||||
CopyToFunc func = tab[depth];
|
||||
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__, "copy_to_with_mask");
|
||||
|
||||
func(mat_src, mat_dst, mask, channels, stream);
|
||||
tab[depth](src, dst, mask, channels, stream);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -303,7 +274,7 @@ namespace cv { namespace gpu { namespace device
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
|
||||
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
|
||||
Convertor<T, D> op(alpha, beta);
|
||||
::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
|
||||
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
|
||||
}
|
||||
|
||||
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)
|
||||
|
@@ -348,7 +348,7 @@ namespace
|
||||
|
||||
namespace cv { namespace gpu { namespace device
|
||||
{
|
||||
void copy_to_with_mask(DevMem2Db src, DevMem2Db dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream);
|
||||
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream);
|
||||
|
||||
template <typename T>
|
||||
void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);
|
||||
@@ -391,13 +391,13 @@ namespace
|
||||
template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream)
|
||||
{
|
||||
Scalar_<T> sf = s;
|
||||
::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream);
|
||||
cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream);
|
||||
}
|
||||
|
||||
template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
|
||||
{
|
||||
Scalar_<T> sf = s;
|
||||
::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
|
||||
cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -405,17 +405,17 @@ namespace cv { namespace gpu
|
||||
{
|
||||
CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
|
||||
{
|
||||
::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels(), stream);
|
||||
cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.depth(), src.channels(), mask, stream);
|
||||
}
|
||||
|
||||
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)
|
||||
{
|
||||
::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
|
||||
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
|
||||
}
|
||||
|
||||
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
|
||||
{
|
||||
::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
|
||||
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
|
||||
}
|
||||
|
||||
CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
|
||||
|
Reference in New Issue
Block a user