fixed bug with submatrix in some gpu functions

update gpu tests
This commit is contained in:
Vladislav Vinogradov
2012-01-10 11:11:58 +00:00
parent 2ce6dd6870
commit af59a75ffc
25 changed files with 1777 additions and 2486 deletions

View File

@@ -59,56 +59,27 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template<typename T>
__global__ void copy_to_with_mask(const T* mat_src, T* mat_dst, const uchar* mask, int cols, int rows, size_t step_mat, size_t step_mask, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step_mat >> shift_and_sizeof<T>::shift ) + x;
mat_dst[idx] = mat_src[idx];
}
template <typename T> void copyToWithMask(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream)
{
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, identity<T>(), SingleMaskChannels(mask, channels), stream);
}
template<typename T>
void copy_to_with_mask_run(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream)
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream)
{
dim3 threadsPerBlock(16,16, 1);
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
typedef void (*func_t)(DevMem2Db src, DevMem2Db dst, DevMem2Db mask, int channels, cudaStream_t stream);
copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
((T*)mat_src.data, (T*)mat_dst.data, (unsigned char*)mask.data, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
void copy_to_with_mask(DevMem2Db mat_src, DevMem2Db mat_dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream)
{
typedef void (*CopyToFunc)(DevMem2Db mat_src, DevMem2Db mat_dst, DevMem2Db mask, int channels, cudaStream_t stream);
static CopyToFunc tab[8] =
static func_t tab[] =
{
copy_to_with_mask_run<unsigned char>,
copy_to_with_mask_run<signed char>,
copy_to_with_mask_run<unsigned short>,
copy_to_with_mask_run<short>,
copy_to_with_mask_run<int>,
copy_to_with_mask_run<float>,
copy_to_with_mask_run<double>,
0
copyToWithMask<unsigned char>,
copyToWithMask<signed char>,
copyToWithMask<unsigned short>,
copyToWithMask<short>,
copyToWithMask<int>,
copyToWithMask<float>,
copyToWithMask<double>
};
CopyToFunc func = tab[depth];
if (func == 0)
cv::gpu::error("Unsupported copyTo operation", __FILE__, __LINE__, "copy_to_with_mask");
func(mat_src, mat_dst, mask, channels, stream);
tab[depth](src, dst, mask, channels, stream);
}
///////////////////////////////////////////////////////////////////////////
@@ -303,7 +274,7 @@ namespace cv { namespace gpu { namespace device
cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
Convertor<T, D> op(alpha, beta);
::cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, stream);
cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
}
void convert_gpu(DevMem2Db src, int sdepth, DevMem2Db dst, int ddepth, double alpha, double beta, cudaStream_t stream)

View File

@@ -348,7 +348,7 @@ namespace
namespace cv { namespace gpu { namespace device
{
void copy_to_with_mask(DevMem2Db src, DevMem2Db dst, int depth, DevMem2Db mask, int channels, cudaStream_t stream);
void copyToWithMask_gpu(DevMem2Db src, DevMem2Db dst, int depth, int channels, DevMem2Db mask, cudaStream_t stream);
template <typename T>
void set_to_gpu(DevMem2Db mat, const T* scalar, int channels, cudaStream_t stream);
@@ -391,13 +391,13 @@ namespace
template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, cudaStream_t stream)
{
Scalar_<T> sf = s;
::cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream);
cv::gpu::device::set_to_gpu(src, sf.val, src.channels(), stream);
}
template <typename T> void kernelSetCaller(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)
{
Scalar_<T> sf = s;
::cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
cv::gpu::device::set_to_gpu(src, sf.val, mask, src.channels(), stream);
}
}
@@ -405,17 +405,17 @@ namespace cv { namespace gpu
{
CV_EXPORTS void copyWithMask(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream = 0)
{
::cv::gpu::device::copy_to_with_mask(src, dst, src.depth(), mask, src.channels(), stream);
cv::gpu::device::copyToWithMask_gpu(src.reshape(1), dst.reshape(1), src.depth(), src.channels(), mask, stream);
}
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst)
{
::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), 1.0, 0.0, 0);
}
CV_EXPORTS void convertTo(const GpuMat& src, GpuMat& dst, double alpha, double beta, cudaStream_t stream = 0)
{
::cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
cv::gpu::device::convert_gpu(src.reshape(1), src.depth(), dst.reshape(1), dst.depth(), alpha, beta, stream);
}
CV_EXPORTS void setTo(GpuMat& src, Scalar s, cudaStream_t stream)