implemented asynchronous call for gpumat::setTo(), gpumat::copyTo(), gpumat::converTo()
This commit is contained in:
@@ -61,12 +61,12 @@ namespace cv
|
||||
{
|
||||
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
|
||||
|
||||
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels);
|
||||
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
extern "C" void set_to_without_mask (const DevMem2D& mat, int depth, const double * scalar, int channels);
|
||||
extern "C" void set_to_with_mask (const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels);
|
||||
extern "C" void set_to_without_mask (const DevMem2D& mat, int depth, const double * scalar, int channels, const cudaStream_t & stream = 0);
|
||||
extern "C" void set_to_with_mask (const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
|
||||
|
||||
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta);
|
||||
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream = 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -42,7 +42,6 @@
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdio.h>
|
||||
//#include <iostream>
|
||||
#include "cuda_shared.hpp"
|
||||
#include "cuda_runtime.h"
|
||||
|
||||
@@ -239,19 +238,27 @@ namespace cv
|
||||
////////////////////////////////// CopyTo /////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels);
|
||||
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||
|
||||
template<typename T>
|
||||
void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels)
|
||||
void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(16,16, 1);
|
||||
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
|
||||
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
|
||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
if (stream == 0)
|
||||
{
|
||||
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
|
||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
|
||||
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels)
|
||||
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
static CopyToFunc tab[8] =
|
||||
{
|
||||
@@ -269,7 +276,7 @@ namespace cv
|
||||
|
||||
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat_src, mat_dst, mask, channels);
|
||||
func(mat_src, mat_dst, mask, channels, stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -277,28 +284,43 @@ namespace cv
|
||||
////////////////////////////////// SetTo //////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels);
|
||||
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels);
|
||||
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream);
|
||||
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels, const cudaStream_t & stream);
|
||||
|
||||
template <typename T>
|
||||
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels)
|
||||
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
if (stream == 0)
|
||||
{
|
||||
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void set_to_without_mask_run(const DevMem2D& mat, int channels)
|
||||
void set_to_without_mask_run(const DevMem2D& mat, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
dim3 threadsPerBlock(32, 8, 1);
|
||||
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
|
||||
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
if (stream == 0)
|
||||
{
|
||||
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
cudaSafeCall ( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" void set_to_without_mask(const DevMem2D& mat, int depth, const double * scalar, int channels)
|
||||
extern "C" void set_to_without_mask(const DevMem2D& mat, int depth, const double * scalar, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
double data[4];
|
||||
data[0] = scalar[0];
|
||||
@@ -323,11 +345,11 @@ namespace cv
|
||||
|
||||
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat, channels);
|
||||
func(mat, channels, stream);
|
||||
}
|
||||
|
||||
|
||||
extern "C" void set_to_with_mask(const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels)
|
||||
extern "C" void set_to_with_mask(const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream)
|
||||
{
|
||||
double data[4];
|
||||
data[0] = scalar[0];
|
||||
@@ -352,7 +374,7 @@ namespace cv
|
||||
|
||||
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
||||
|
||||
func(mat, mask, channels);
|
||||
func(mat, mask, channels, stream);
|
||||
}
|
||||
|
||||
|
||||
@@ -360,22 +382,27 @@ namespace cv
|
||||
//////////////////////////////// ConvertTo ////////////////////////////////
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta);
|
||||
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream);
|
||||
|
||||
template<typename T, typename DT>
|
||||
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta)
|
||||
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
|
||||
{
|
||||
const int shift = ::mat_operators::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
|
||||
|
||||
dim3 block(32, 8);
|
||||
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
|
||||
if (stream == 0)
|
||||
{
|
||||
::mat_operators::kernel_convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
else
|
||||
{
|
||||
::mat_operators::kernel_convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
||||
}
|
||||
}
|
||||
|
||||
::mat_operators::kernel_convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
}
|
||||
|
||||
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta)
|
||||
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
|
||||
{
|
||||
static CvtFunc tab[8][8] =
|
||||
{
|
||||
@@ -406,7 +433,7 @@ namespace cv
|
||||
CvtFunc func = tab[sdepth][ddepth];
|
||||
if (func == 0)
|
||||
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
|
||||
func(src, dst, width, height, alpha, beta);
|
||||
func(src, dst, width, height, alpha, beta, stream);
|
||||
}
|
||||
} // namespace impl
|
||||
} // namespace gpu
|
||||
|
||||
Reference in New Issue
Block a user