cv::gpu::CudaStream -> cv::gpu::Stream
some refactoring added gpu module to compilation
This commit is contained in:
@@ -52,7 +52,7 @@ cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int,
|
||||
cv::gpu::StereoBeliefPropagation_GPU::StereoBeliefPropagation_GPU(int, int, int, float, float, float, float, int, float) { throw_nogpu(); }
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); }
|
||||
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
||||
|
||||
bool cv::gpu::StereoBeliefPropagation_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }
|
||||
|
||||
@@ -282,7 +282,7 @@ void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const
|
||||
::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, 0);
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const CudaStream& stream)
|
||||
void cv::gpu::StereoBeliefPropagation_GPU::operator()(const GpuMat& left, const GpuMat& right, GpuMat& disp, const Stream& stream)
|
||||
{
|
||||
::stereo_bp_gpu_operator(ndisp, iters, levels, max_data_term, data_weight, max_disc_term, disc_single_jump, msg_type, msg_scale, u, d, l, r, u2, d2, l2, r2, datas, out, left, right, disp, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
@@ -44,9 +44,11 @@
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
|
||||
/////////////////////////////////// Remap ///////////////////////////////////////////////
|
||||
namespace imgproc
|
||||
{
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex1;
|
||||
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
|
||||
|
||||
__global__ void kernel_remap(const float *mapx, const float *mapy, size_t map_step, unsigned char* out, size_t out_step, int width, int height)
|
||||
{
|
||||
@@ -58,12 +60,40 @@ namespace imgproc
|
||||
|
||||
float xcoo = mapx[idx];
|
||||
float ycoo = mapy[idx];
|
||||
|
||||
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex1, xcoo, ycoo));
|
||||
|
||||
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo));
|
||||
}
|
||||
}
|
||||
|
||||
texture< uchar4, 2, cudaReadModeElementType > tex_meanshift;
|
||||
}
|
||||
|
||||
namespace cv { namespace gpu { namespace impl
|
||||
{
|
||||
extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst)
|
||||
{
|
||||
dim3 block(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
grid.x = divUp(dst.cols, block.x);
|
||||
grid.y = divUp(dst.rows, block.y);
|
||||
|
||||
imgproc::tex_remap.filterMode = cudaFilterModeLinear;
|
||||
imgproc::tex_remap.addressMode[0] = imgproc::tex_remap.addressMode[1] = cudaAddressModeWrap;
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, dst.cols, dst.rows, src.step) );
|
||||
|
||||
imgproc::kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) );
|
||||
}
|
||||
}}}
|
||||
|
||||
|
||||
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
|
||||
|
||||
namespace imgproc
|
||||
{
|
||||
texture<uchar4, 2> tex_meanshift;
|
||||
|
||||
extern "C" __global__ void meanshift_kernel( unsigned char* out, int out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
|
||||
{
|
||||
@@ -72,9 +102,8 @@ namespace imgproc
|
||||
|
||||
if( x0 < cols && y0 < rows )
|
||||
{
|
||||
|
||||
int isr2 = sr*sr;
|
||||
uchar4 c = tex2D( tex_meanshift, x0, y0 );
|
||||
uchar4 c = tex2D(tex_meanshift, x0, y0 );
|
||||
// iterate meanshift procedure
|
||||
for( int iter = 0; iter < maxIter; iter++ )
|
||||
{
|
||||
@@ -137,26 +166,6 @@ namespace imgproc
|
||||
|
||||
namespace cv { namespace gpu { namespace impl
|
||||
{
|
||||
using namespace imgproc;
|
||||
|
||||
extern "C" void remap_gpu(const DevMem2D& src, const DevMem2D_<float>& xmap, const DevMem2D_<float>& ymap, DevMem2D dst)
|
||||
{
|
||||
dim3 block(16, 16, 1);
|
||||
dim3 grid(1, 1, 1);
|
||||
grid.x = divUp(dst.cols, block.x);
|
||||
grid.y = divUp(dst.rows, block.y);
|
||||
|
||||
tex1.filterMode = cudaFilterModeLinear;
|
||||
tex1.addressMode[0] = tex1.addressMode[1] = cudaAddressModeWrap;
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
|
||||
cudaSafeCall( cudaBindTexture2D(0, tex1, src.ptr, desc, dst.cols, dst.rows, src.step) );
|
||||
|
||||
kernel_remap<<<grid, block>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
|
||||
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture(tex1) );
|
||||
}
|
||||
|
||||
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, float sp, float sr, int maxIter, float eps)
|
||||
{
|
||||
dim3 grid(1, 1, 1);
|
||||
@@ -165,11 +174,11 @@ namespace cv { namespace gpu { namespace impl
|
||||
grid.y = divUp(src.rows, threads.y);
|
||||
|
||||
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
|
||||
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
||||
cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
|
||||
|
||||
meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||
imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
|
||||
cudaSafeCall( cudaThreadSynchronize() );
|
||||
cudaSafeCall( cudaUnbindTexture( tex_meanshift ) );
|
||||
cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
|
||||
}
|
||||
}}}
|
||||
|
||||
|
@@ -48,28 +48,28 @@ using namespace cv::gpu;
|
||||
|
||||
#if !defined (HAVE_CUDA)
|
||||
|
||||
void cv::gpu::CudaStream::create() { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::release() { throw_nogpu(); }
|
||||
cv::gpu::CudaStream::CudaStream() : impl(0) { throw_nogpu(); }
|
||||
cv::gpu::CudaStream::~CudaStream() { throw_nogpu(); }
|
||||
cv::gpu::CudaStream::CudaStream(const CudaStream& /*stream*/) { throw_nogpu(); }
|
||||
CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& /*stream*/) { throw_nogpu(); return *this; }
|
||||
bool cv::gpu::CudaStream::queryIfComplete() { throw_nogpu(); return true; }
|
||||
void cv::gpu::CudaStream::waitForCompletion() { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
|
||||
void cv::gpu::CudaStream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::create() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::release() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream() : impl(0) { throw_nogpu(); }
|
||||
cv::gpu::Stream::~Stream() { throw_nogpu(); }
|
||||
cv::gpu::Stream::Stream(const Stream& /*stream*/) { throw_nogpu(); }
|
||||
Stream& cv::gpu::Stream::operator=(const Stream& /*stream*/) { throw_nogpu(); return *this; }
|
||||
bool cv::gpu::Stream::queryIfComplete() { throw_nogpu(); return true; }
|
||||
void cv::gpu::Stream::waitForCompletion() { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, Mat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& /*src*/, MatPL& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const MatPL& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& /*src*/, GpuMat& /*dst*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueMemSet(const GpuMat& /*src*/, Scalar /*val*/, const GpuMat& /*mask*/) { throw_nogpu(); }
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int /*type*/, double /*a*/, double /*b*/) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
#include "opencv2/gpu/stream_accessor.hpp"
|
||||
|
||||
struct CudaStream::Impl
|
||||
struct Stream::Impl
|
||||
{
|
||||
cudaStream_t stream;
|
||||
int ref_counter;
|
||||
@@ -85,9 +85,9 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const CudaStream& stream) { return stream.impl->stream; };
|
||||
CV_EXPORTS cudaStream_t cv::gpu::StreamAccessor::getStream(const Stream& stream) { return stream.impl->stream; };
|
||||
|
||||
void cv::gpu::CudaStream::create()
|
||||
void cv::gpu::Stream::create()
|
||||
{
|
||||
if (impl)
|
||||
release();
|
||||
@@ -95,13 +95,13 @@ void cv::gpu::CudaStream::create()
|
||||
cudaStream_t stream;
|
||||
cudaSafeCall( cudaStreamCreate( &stream ) );
|
||||
|
||||
impl = (CudaStream::Impl*)fastMalloc(sizeof(CudaStream::Impl));
|
||||
impl = (Stream::Impl*)fastMalloc(sizeof(Stream::Impl));
|
||||
|
||||
impl->stream = stream;
|
||||
impl->ref_counter = 1;
|
||||
}
|
||||
|
||||
void cv::gpu::CudaStream::release()
|
||||
void cv::gpu::Stream::release()
|
||||
{
|
||||
if( impl && CV_XADD(&impl->ref_counter, -1) == 1 )
|
||||
{
|
||||
@@ -110,15 +110,15 @@ void cv::gpu::CudaStream::release()
|
||||
}
|
||||
}
|
||||
|
||||
cv::gpu::CudaStream::CudaStream() : impl(0) { create(); }
|
||||
cv::gpu::CudaStream::~CudaStream() { release(); }
|
||||
cv::gpu::Stream::Stream() : impl(0) { create(); }
|
||||
cv::gpu::Stream::~Stream() { release(); }
|
||||
|
||||
cv::gpu::CudaStream::CudaStream(const CudaStream& stream) : impl(stream.impl)
|
||||
cv::gpu::Stream::Stream(const Stream& stream) : impl(stream.impl)
|
||||
{
|
||||
if( impl )
|
||||
CV_XADD(&impl->ref_counter, 1);
|
||||
}
|
||||
CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream)
|
||||
Stream& cv::gpu::Stream::operator=(const Stream& stream)
|
||||
{
|
||||
if( this != &stream )
|
||||
{
|
||||
@@ -131,7 +131,7 @@ CudaStream& cv::gpu::CudaStream::operator=(const CudaStream& stream)
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool cv::gpu::CudaStream::queryIfComplete()
|
||||
bool cv::gpu::Stream::queryIfComplete()
|
||||
{
|
||||
cudaError_t err = cudaStreamQuery( impl->stream );
|
||||
|
||||
@@ -142,31 +142,31 @@ bool cv::gpu::CudaStream::queryIfComplete()
|
||||
return false;
|
||||
}
|
||||
|
||||
void cv::gpu::CudaStream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }
|
||||
void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize( impl->stream ) ); }
|
||||
|
||||
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, Mat& dst)
|
||||
{
|
||||
// if not -> allocation will be done, but after that dst will not point to page locked memory
|
||||
CV_Assert(src.cols == dst.cols && src.rows == dst.rows && src.type() == dst.type() )
|
||||
devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost);
|
||||
}
|
||||
void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
|
||||
void cv::gpu::Stream::enqueueDownload(const GpuMat& src, MatPL& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToHost); }
|
||||
|
||||
void cv::gpu::CudaStream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::CudaStream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::CudaStream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
|
||||
void cv::gpu::Stream::enqueueUpload(const MatPL& src, GpuMat& dst){ devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueUpload(const Mat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyHostToDevice); }
|
||||
void cv::gpu::Stream::enqueueCopy(const GpuMat& src, GpuMat& dst) { devcopy(src, dst, impl->stream, cudaMemcpyDeviceToDevice); }
|
||||
|
||||
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val)
|
||||
void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val)
|
||||
{
|
||||
impl::set_to_without_mask(src, src.depth(), val.val, src.channels(), impl->stream);
|
||||
}
|
||||
|
||||
void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)
|
||||
void cv::gpu::Stream::enqueueMemSet(const GpuMat& src, Scalar val, const GpuMat& mask)
|
||||
{
|
||||
impl::set_to_with_mask(src, src.depth(), val.val, mask, src.channels(), impl->stream);
|
||||
}
|
||||
|
||||
void cv::gpu::CudaStream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
|
||||
void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int rtype, double alpha, double beta)
|
||||
{
|
||||
bool noScale = fabs(alpha-1) < std::numeric_limits<double>::epsilon() && fabs(beta) < std::numeric_limits<double>::epsilon();
|
||||
|
||||
|
@@ -82,7 +82,7 @@ void cv::gpu::GpuMat::upload(const Mat& m)
|
||||
cudaSafeCall( cudaMemcpy2D(data, step, m.data, m.step, cols * elemSize(), rows, cudaMemcpyHostToDevice) );
|
||||
}
|
||||
|
||||
void cv::gpu::GpuMat::upload(const MatPL& m, CudaStream& stream)
|
||||
void cv::gpu::GpuMat::upload(const MatPL& m, Stream& stream)
|
||||
{
|
||||
CV_DbgAssert(!m.empty());
|
||||
stream.enqueueUpload(m, *this);
|
||||
@@ -95,7 +95,7 @@ void cv::gpu::GpuMat::download(cv::Mat& m) const
|
||||
cudaSafeCall( cudaMemcpy2D(m.data, m.step, data, step, cols * elemSize(), rows, cudaMemcpyDeviceToHost) );
|
||||
}
|
||||
|
||||
void cv::gpu::GpuMat::download(MatPL& m, CudaStream& stream) const
|
||||
void cv::gpu::GpuMat::download(MatPL& m, Stream& stream) const
|
||||
{
|
||||
CV_DbgAssert(!m.empty());
|
||||
stream.enqueueDownload(*this, m);
|
||||
|
@@ -52,7 +52,7 @@ cv::gpu::StereoBM_GPU::StereoBM_GPU(int, int, int) { throw_nogpu(); }
|
||||
|
||||
bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable() { throw_nogpu(); return false; }
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const CudaStream&) { throw_nogpu(); }
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, const Stream&) { throw_nogpu(); }
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
|
||||
@@ -134,7 +134,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right
|
||||
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, 0);
|
||||
}
|
||||
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const CudaStream& stream)
|
||||
void cv::gpu::StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat& disparity, const Stream& stream)
|
||||
{
|
||||
::stereo_bm_gpu_operator(minSSD, leBuf, riBuf, preset, ndisp, winSize, avergeTexThreshold, left, right, disparity, StreamAccessor::getStream(stream));
|
||||
}
|
||||
|
Reference in New Issue
Block a user