added Event class (wrapper for cudaEvent)
This commit is contained in:
parent
2dab93c2e8
commit
f17b836d24
@ -495,6 +495,7 @@ namespace gpu
|
||||
class CV_EXPORTS GpuMat;
|
||||
class CV_EXPORTS CudaMem;
|
||||
class CV_EXPORTS Stream;
|
||||
class CV_EXPORTS Event;
|
||||
}
|
||||
|
||||
} // cv
|
||||
|
@ -359,6 +359,9 @@ public:
|
||||
//! waits for stream tasks to complete
|
||||
void waitForCompletion();
|
||||
|
||||
//! makes a compute stream wait on an event
|
||||
void waitEvent(const Event& event);
|
||||
|
||||
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
|
||||
void enqueueHostCallback(StreamCallback callback, void* userData);
|
||||
|
||||
@ -390,6 +393,39 @@ private:
|
||||
friend struct StreamAccessor;
|
||||
};
|
||||
|
||||
class CV_EXPORTS Event
|
||||
{
|
||||
public:
|
||||
enum CreateFlags
|
||||
{
|
||||
DEFAULT = 0x00, /**< Default event flag */
|
||||
BLOCKING_SYNC = 0x01, /**< Event uses blocking synchronization */
|
||||
DISABLE_TIMING = 0x02, /**< Event will not record timing data */
|
||||
INTERPROCESS = 0x04 /**< Event is suitable for interprocess use. DisableTiming must be set */
|
||||
};
|
||||
|
||||
explicit Event(CreateFlags flags = DEFAULT);
|
||||
|
||||
//! records an event
|
||||
void record(Stream& stream = Stream::Null());
|
||||
|
||||
//! queries an event's status
|
||||
bool queryIfComplete() const;
|
||||
|
||||
//! waits for an event to complete
|
||||
void waitForCompletion();
|
||||
|
||||
//! computes the elapsed time between events
|
||||
static float elapsedTime(const Event& start, const Event& end);
|
||||
|
||||
class Impl;
|
||||
|
||||
private:
|
||||
Ptr<Impl> impl_;
|
||||
|
||||
friend struct EventAccessor;
|
||||
};
|
||||
|
||||
//////////////////////////////// Initialization & Info ////////////////////////
|
||||
|
||||
//! this is the only function that do not throw exceptions if the library is compiled without CUDA
|
||||
@ -642,6 +678,7 @@ CV_EXPORTS void printShortCudaDeviceInfo(int device);
|
||||
namespace cv {
|
||||
|
||||
template <> CV_EXPORTS void Ptr<cv::gpu::Stream::Impl>::delete_obj();
|
||||
template <> CV_EXPORTS void Ptr<cv::gpu::Event::Impl>::delete_obj();
|
||||
|
||||
}
|
||||
|
||||
|
@ -60,11 +60,17 @@ namespace cv
|
||||
namespace gpu
|
||||
{
|
||||
class Stream;
|
||||
class Event;
|
||||
|
||||
struct StreamAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaStream_t getStream(const Stream& stream);
|
||||
};
|
||||
|
||||
struct EventAccessor
|
||||
{
|
||||
CV_EXPORTS static cudaEvent_t getEvent(const Event& event);
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -45,6 +45,9 @@
|
||||
using namespace cv;
|
||||
using namespace cv::gpu;
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Stream
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
class cv::gpu::Stream::Impl
|
||||
@ -126,6 +129,16 @@ void cv::gpu::Stream::waitForCompletion()
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::Stream::waitEvent(const Event& event)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) event;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cudaSafeCall( cudaStreamWaitEvent(impl_->stream, EventAccessor::getEvent(event), 0) );
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)
|
||||
|
||||
namespace
|
||||
@ -186,3 +199,110 @@ template <> void cv::Ptr<Stream::Impl>::delete_obj()
|
||||
{
|
||||
if (obj) delete obj;
|
||||
}
|
||||
|
||||
////////////////////////////////////////////////////////////////
|
||||
// Stream
|
||||
|
||||
#ifndef HAVE_CUDA
|
||||
|
||||
class cv::gpu::Event::Impl
|
||||
{
|
||||
public:
|
||||
Impl(unsigned int)
|
||||
{
|
||||
throw_no_cuda();
|
||||
}
|
||||
};
|
||||
|
||||
#else
|
||||
|
||||
class cv::gpu::Event::Impl
|
||||
{
|
||||
public:
|
||||
cudaEvent_t event;
|
||||
|
||||
Impl(unsigned int flags);
|
||||
~Impl();
|
||||
};
|
||||
|
||||
cv::gpu::Event::Impl::Impl(unsigned int flags) : event(0)
|
||||
{
|
||||
cudaSafeCall( cudaEventCreateWithFlags(&event, flags) );
|
||||
}
|
||||
|
||||
cv::gpu::Event::Impl::~Impl()
|
||||
{
|
||||
if (event)
|
||||
cudaEventDestroy(event);
|
||||
}
|
||||
|
||||
cudaEvent_t cv::gpu::EventAccessor::getEvent(const Event& event)
|
||||
{
|
||||
return event.impl_->event;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
cv::gpu::Event::Event(CreateFlags flags)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) flags;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
impl_ = new Impl(flags);
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::Event::record(Stream& stream)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) stream;
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cudaSafeCall( cudaEventRecord(impl_->event, StreamAccessor::getStream(stream)) );
|
||||
#endif
|
||||
}
|
||||
|
||||
bool cv::gpu::Event::queryIfComplete() const
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
return false;
|
||||
#else
|
||||
cudaError_t err = cudaEventQuery(impl_->event);
|
||||
|
||||
if (err == cudaErrorNotReady || err == cudaSuccess)
|
||||
return err == cudaSuccess;
|
||||
|
||||
cudaSafeCall(err);
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
void cv::gpu::Event::waitForCompletion()
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
throw_no_cuda();
|
||||
#else
|
||||
cudaSafeCall( cudaEventSynchronize(impl_->event) );
|
||||
#endif
|
||||
}
|
||||
|
||||
float cv::gpu::Event::elapsedTime(const Event& start, const Event& end)
|
||||
{
|
||||
#ifndef HAVE_CUDA
|
||||
(void) start;
|
||||
(void) end;
|
||||
throw_no_cuda();
|
||||
return 0.0f;
|
||||
#else
|
||||
float ms;
|
||||
cudaSafeCall( cudaEventElapsedTime(&ms, start.impl_->event, end.impl_->event) );
|
||||
return ms;
|
||||
#endif
|
||||
}
|
||||
|
||||
template <> void cv::Ptr<Event::Impl>::delete_obj()
|
||||
{
|
||||
if (obj) delete obj;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user