Merge pull request #3566 from jet47:cuda-imgproc-refactoring
This commit is contained in:
commit
2ecca8f59c
@ -130,6 +130,12 @@ namespace cv { namespace cuda
|
||||
class NppStreamHandler
|
||||
{
|
||||
public:
|
||||
inline explicit NppStreamHandler(Stream& newStream)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
nppSetStream(StreamAccessor::getStream(newStream));
|
||||
}
|
||||
|
||||
inline explicit NppStreamHandler(cudaStream_t newStream)
|
||||
{
|
||||
oldStream = nppGetStream();
|
||||
|
@ -205,19 +205,11 @@ CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stre
|
||||
|
||||
@param src Source image with CV_8UC1 type.
|
||||
@param dst Destination image.
|
||||
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa equalizeHist
|
||||
*/
|
||||
CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null());
|
||||
|
||||
/** @overload */
|
||||
static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buf;
|
||||
cuda::equalizeHist(src, dst, buf, stream);
|
||||
}
|
||||
CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. :
|
||||
*/
|
||||
@ -248,8 +240,9 @@ CV_EXPORTS Ptr<cuda::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSi
|
||||
@param nLevels Number of computed levels. nLevels must be at least 2.
|
||||
@param lowerLevel Lower boundary value of the lowest level.
|
||||
@param upperLevel Upper boundary value of the greatest level.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel);
|
||||
CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Calculates a histogram with evenly distributed bins.
|
||||
|
||||
@ -259,27 +252,11 @@ a four-channel image, all channels are processed separately.
|
||||
@param histSize Size of the histogram.
|
||||
@param lowerLevel Lower boundary of lowest-level bin.
|
||||
@param upperLevel Upper boundary of highest-level bin.
|
||||
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
|
||||
|
||||
CV_EXPORTS void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
|
||||
/** @overload */
|
||||
static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buf;
|
||||
cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
|
||||
|
||||
/** @overload */
|
||||
static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buf;
|
||||
cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
|
||||
}
|
||||
CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
|
||||
|
||||
/** @brief Calculates a histogram with bins determined by the levels array.
|
||||
|
||||
@ -287,27 +264,11 @@ static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int
|
||||
For a four-channel image, all channels are processed separately.
|
||||
@param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type.
|
||||
@param levels Number of levels in the histogram.
|
||||
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null());
|
||||
|
||||
CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null());
|
||||
/** @overload */
|
||||
static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buf;
|
||||
cuda::histRange(src, hist, levels, buf, stream);
|
||||
}
|
||||
|
||||
/** @overload */
|
||||
CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null());
|
||||
|
||||
/** @overload */
|
||||
static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null())
|
||||
{
|
||||
GpuMat buf;
|
||||
cuda::histRange(src, hist, levels, buf, stream);
|
||||
}
|
||||
CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
|
||||
|
||||
//! @} cudaimgproc_hist
|
||||
|
||||
@ -322,14 +283,16 @@ public:
|
||||
|
||||
@param image Single-channel 8-bit input image.
|
||||
@param edges Output edge map. It has the same size and type as image.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
virtual void detect(InputArray image, OutputArray edges) = 0;
|
||||
virtual void detect(InputArray image, OutputArray edges, Stream& stream = Stream::Null()) = 0;
|
||||
/** @overload
|
||||
@param dx First derivative of image in the vertical direction. Support only CV_32S type.
|
||||
@param dy First derivative of image in the horizontal direction. Support only CV_32S type.
|
||||
@param edges Output edge map. It has the same size and type as image.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
virtual void detect(InputArray dx, InputArray dy, OutputArray edges) = 0;
|
||||
virtual void detect(InputArray dx, InputArray dy, OutputArray edges, Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
virtual void setLowThreshold(double low_thresh) = 0;
|
||||
virtual double getLowThreshold() const = 0;
|
||||
@ -376,18 +339,20 @@ public:
|
||||
\f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
|
||||
the image). \f$\theta\f$ is the line rotation angle in radians (
|
||||
\f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa HoughLines
|
||||
*/
|
||||
virtual void detect(InputArray src, OutputArray lines) = 0;
|
||||
virtual void detect(InputArray src, OutputArray lines, Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
/** @brief Downloads results from cuda::HoughLinesDetector::detect to host memory.
|
||||
|
||||
@param d_lines Result of cuda::HoughLinesDetector::detect .
|
||||
@param h_lines Output host array.
|
||||
@param h_votes Optional output array for line's votes.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) = 0;
|
||||
virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray(), Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
virtual void setRho(float rho) = 0;
|
||||
virtual float getRho() const = 0;
|
||||
@ -431,10 +396,11 @@ public:
|
||||
@param lines Output vector of lines. Each line is represented by a 4-element vector
|
||||
\f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected
|
||||
line segment.
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa HoughLinesP
|
||||
*/
|
||||
virtual void detect(InputArray src, OutputArray lines) = 0;
|
||||
virtual void detect(InputArray src, OutputArray lines, Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
virtual void setRho(float rho) = 0;
|
||||
virtual float getRho() const = 0;
|
||||
@ -475,10 +441,11 @@ public:
|
||||
@param src 8-bit, single-channel grayscale input image.
|
||||
@param circles Output vector of found circles. Each vector is encoded as a 3-element
|
||||
floating-point vector \f$(x, y, radius)\f$ .
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa HoughCircles
|
||||
*/
|
||||
virtual void detect(InputArray src, OutputArray circles) = 0;
|
||||
virtual void detect(InputArray src, OutputArray circles, Stream& stream = Stream::Null()) = 0;
|
||||
|
||||
virtual void setDp(float dp) = 0;
|
||||
virtual float getDp() const = 0;
|
||||
@ -593,8 +560,9 @@ public:
|
||||
positions).
|
||||
@param mask Optional region of interest. If the image is not empty (it needs to have the type
|
||||
CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray()) = 0;
|
||||
virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray(), Stream& stream = Stream::Null()) = 0;
|
||||
};
|
||||
|
||||
/** @brief Creates implementation for cuda::CornersDetector .
|
||||
@ -630,7 +598,7 @@ as src .
|
||||
@param sp Spatial window radius.
|
||||
@param sr Color window radius.
|
||||
@param criteria Termination criteria. See TermCriteria.
|
||||
@param stream
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
It maps each point of the source image into another point. As a result, you have a new color and new
|
||||
position of each point.
|
||||
@ -650,7 +618,7 @@ src size. The type is CV_16SC2 .
|
||||
@param sp Spatial window radius.
|
||||
@param sr Color window radius.
|
||||
@param criteria Termination criteria. See TermCriteria.
|
||||
@param stream
|
||||
@param stream Stream for the asynchronous version.
|
||||
|
||||
@sa cuda::meanShiftFiltering
|
||||
*/
|
||||
@ -666,9 +634,11 @@ CV_EXPORTS void meanShiftProc(InputArray src, OutputArray dstr, OutputArray dsts
|
||||
@param sr Color window radius.
|
||||
@param minsize Minimum segment size. Smaller segments are merged.
|
||||
@param criteria Termination criteria. See TermCriteria.
|
||||
@param stream Stream for the asynchronous version.
|
||||
*/
|
||||
CV_EXPORTS void meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize,
|
||||
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
|
||||
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
|
||||
Stream& stream = Stream::Null());
|
||||
|
||||
/////////////////////////// Match Template ////////////////////////////
|
||||
|
||||
|
@ -63,9 +63,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC1,
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst;
|
||||
cv::cuda::GpuMat d_buf;
|
||||
|
||||
TEST_CYCLE() cv::cuda::histEven(d_src, dst, d_buf, 30, 0, 180);
|
||||
TEST_CYCLE() cv::cuda::histEven(d_src, dst, 30, 0, 180);
|
||||
|
||||
CUDA_SANITY_CHECK(dst);
|
||||
}
|
||||
@ -106,9 +105,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC4,
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat d_hist[4];
|
||||
cv::cuda::GpuMat d_buf;
|
||||
|
||||
TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
|
||||
TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, histSize, lowerLevel, upperLevel);
|
||||
|
||||
cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
|
||||
d_hist[0].download(cpu_hist0);
|
||||
@ -167,9 +165,8 @@ PERF_TEST_P(Sz, EqualizeHist,
|
||||
{
|
||||
const cv::cuda::GpuMat d_src(src);
|
||||
cv::cuda::GpuMat dst;
|
||||
cv::cuda::GpuMat d_buf;
|
||||
|
||||
TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst, d_buf);
|
||||
TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst);
|
||||
|
||||
CUDA_SANITY_CHECK(dst);
|
||||
}
|
||||
|
@ -53,16 +53,16 @@ Ptr<CannyEdgeDetector> cv::cuda::createCannyEdgeDetector(double, double, int, bo
|
||||
|
||||
namespace canny
|
||||
{
|
||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
|
||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
|
||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream);
|
||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream);
|
||||
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh);
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream);
|
||||
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1);
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream);
|
||||
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2);
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream);
|
||||
|
||||
void getEdges(PtrStepSzi map, PtrStepSzb dst);
|
||||
void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream);
|
||||
}
|
||||
|
||||
namespace
|
||||
@ -76,8 +76,8 @@ namespace
|
||||
old_apperture_size_ = -1;
|
||||
}
|
||||
|
||||
void detect(InputArray image, OutputArray edges);
|
||||
void detect(InputArray dx, InputArray dy, OutputArray edges);
|
||||
void detect(InputArray image, OutputArray edges, Stream& stream);
|
||||
void detect(InputArray dx, InputArray dy, OutputArray edges, Stream& stream);
|
||||
|
||||
void setLowThreshold(double low_thresh) { low_thresh_ = low_thresh; }
|
||||
double getLowThreshold() const { return low_thresh_; }
|
||||
@ -111,7 +111,7 @@ namespace
|
||||
|
||||
private:
|
||||
void createBuf(Size image_size);
|
||||
void CannyCaller(GpuMat& edges);
|
||||
void CannyCaller(GpuMat& edges, Stream& stream);
|
||||
|
||||
double low_thresh_;
|
||||
double high_thresh_;
|
||||
@ -128,7 +128,7 @@ namespace
|
||||
int old_apperture_size_;
|
||||
};
|
||||
|
||||
void CannyImpl::detect(InputArray _image, OutputArray _edges)
|
||||
void CannyImpl::detect(InputArray _image, OutputArray _edges, Stream& stream)
|
||||
{
|
||||
GpuMat image = _image.getGpuMat();
|
||||
|
||||
@ -150,24 +150,24 @@ namespace
|
||||
image.locateROI(wholeSize, ofs);
|
||||
GpuMat srcWhole(wholeSize, image.type(), image.datastart, image.step);
|
||||
|
||||
canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_);
|
||||
canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifndef HAVE_OPENCV_CUDAFILTERS
|
||||
throw_no_cuda();
|
||||
#else
|
||||
filterDX_->apply(image, dx_);
|
||||
filterDY_->apply(image, dy_);
|
||||
filterDX_->apply(image, dx_, stream);
|
||||
filterDY_->apply(image, dy_, stream);
|
||||
|
||||
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
|
||||
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
|
||||
#endif
|
||||
}
|
||||
|
||||
CannyCaller(edges);
|
||||
CannyCaller(edges, stream);
|
||||
}
|
||||
|
||||
void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges)
|
||||
void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges, Stream& stream)
|
||||
{
|
||||
GpuMat dx = _dx.getGpuMat();
|
||||
GpuMat dy = _dy.getGpuMat();
|
||||
@ -176,8 +176,8 @@ namespace
|
||||
CV_Assert( dy.type() == dx.type() && dy.size() == dx.size() );
|
||||
CV_Assert( deviceSupports(SHARED_ATOMICS) );
|
||||
|
||||
dx.copyTo(dx_);
|
||||
dy.copyTo(dy_);
|
||||
dx.copyTo(dx_, stream);
|
||||
dy.copyTo(dy_, stream);
|
||||
|
||||
if (low_thresh_ > high_thresh_)
|
||||
std::swap(low_thresh_, high_thresh_);
|
||||
@ -187,9 +187,9 @@ namespace
|
||||
_edges.create(dx.size(), CV_8UC1);
|
||||
GpuMat edges = _edges.getGpuMat();
|
||||
|
||||
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
|
||||
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
|
||||
|
||||
CannyCaller(edges);
|
||||
CannyCaller(edges, stream);
|
||||
}
|
||||
|
||||
void CannyImpl::createBuf(Size image_size)
|
||||
@ -215,16 +215,16 @@ namespace
|
||||
ensureSizeIsEnough(1, image_size.area(), CV_16SC2, st2_);
|
||||
}
|
||||
|
||||
void CannyImpl::CannyCaller(GpuMat& edges)
|
||||
void CannyImpl::CannyCaller(GpuMat& edges, Stream& stream)
|
||||
{
|
||||
map_.setTo(Scalar::all(0));
|
||||
canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_));
|
||||
canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_), StreamAccessor::getStream(stream));
|
||||
|
||||
canny::edgesHysteresisLocal(map_, st1_.ptr<short2>());
|
||||
canny::edgesHysteresisLocal(map_, st1_.ptr<short2>(), StreamAccessor::getStream(stream));
|
||||
|
||||
canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>());
|
||||
canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>(), StreamAccessor::getStream(stream));
|
||||
|
||||
canny::getEdges(map_, edges);
|
||||
canny::getEdges(map_, edges, StreamAccessor::getStream(stream));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -120,7 +120,7 @@ namespace canny
|
||||
mag(y, x) = norm(dxVal, dyVal);
|
||||
}
|
||||
|
||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
|
||||
@ -131,30 +131,31 @@ namespace canny
|
||||
if (L2Grad)
|
||||
{
|
||||
L2 norm;
|
||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
||||
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||
}
|
||||
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
|
||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
|
||||
{
|
||||
if (L2Grad)
|
||||
{
|
||||
L2 norm;
|
||||
transform(dx, dy, mag, norm, WithOutMask(), 0);
|
||||
transform(dx, dy, mag, norm, WithOutMask(), stream);
|
||||
}
|
||||
else
|
||||
{
|
||||
L1 norm;
|
||||
transform(dx, dy, mag, norm, WithOutMask(), 0);
|
||||
transform(dx, dy, mag, norm, WithOutMask(), stream);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -217,16 +218,17 @@ namespace canny
|
||||
map(y, x) = edge_type;
|
||||
}
|
||||
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
|
||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream)
|
||||
{
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
|
||||
|
||||
bindTexture(&tex_mag, mag);
|
||||
|
||||
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
@ -328,19 +330,20 @@ namespace canny
|
||||
}
|
||||
}
|
||||
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1)
|
||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
|
||||
|
||||
const dim3 block(16, 16);
|
||||
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
|
||||
|
||||
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
|
||||
edgesHysteresisLocalKernel<<<grid, block, 0, stream>>>(map, st1);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
}
|
||||
@ -441,27 +444,30 @@ namespace canny
|
||||
}
|
||||
}
|
||||
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2)
|
||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream)
|
||||
{
|
||||
void* counter_ptr;
|
||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
||||
|
||||
int count;
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
while (count > 0)
|
||||
{
|
||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
||||
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
|
||||
|
||||
const dim3 block(128);
|
||||
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
|
||||
|
||||
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
|
||||
edgesHysteresisGlobalKernel<<<grid, block, 0, stream>>>(map, st1, st2, count);
|
||||
cudaSafeCall( cudaGetLastError() );
|
||||
|
||||
if (stream == NULL)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
|
||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
||||
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||
|
||||
count = min(count, map.cols * map.rows);
|
||||
|
||||
@ -499,9 +505,9 @@ namespace cv { namespace cuda { namespace device
|
||||
|
||||
namespace canny
|
||||
{
|
||||
void getEdges(PtrStepSzi map, PtrStepSzb dst)
|
||||
void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream)
|
||||
{
|
||||
transform(map, dst, GetEdges(), WithOutMask(), 0);
|
||||
transform(map, dst, GetEdges(), WithOutMask(), stream);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,7 +68,7 @@ namespace
|
||||
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
|
||||
int blockSize, bool useHarrisDetector, double harrisK);
|
||||
|
||||
void detect(InputArray image, OutputArray corners, InputArray mask = noArray());
|
||||
void detect(InputArray image, OutputArray corners, InputArray mask, Stream& stream);
|
||||
|
||||
private:
|
||||
int maxCorners_;
|
||||
@ -96,8 +96,11 @@ namespace
|
||||
cuda::createMinEigenValCorner(srcType, blockSize, 3);
|
||||
}
|
||||
|
||||
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask)
|
||||
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
|
||||
{
|
||||
// TODO : implement async version
|
||||
(void) stream;
|
||||
|
||||
using namespace cv::cuda::device::gfft;
|
||||
|
||||
GpuMat image = _image.getGpuMat();
|
||||
|
@ -49,11 +49,11 @@ using namespace cv::cuda;
|
||||
|
||||
void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::equalizeHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||
|
||||
cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
|
||||
|
||||
void cv::cuda::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); }
|
||||
void cv::cuda::evenLevels(OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
|
||||
void cv::cuda::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
|
||||
void cv::cuda::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
|
||||
@ -93,7 +93,7 @@ namespace hist
|
||||
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
|
||||
}
|
||||
|
||||
void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream)
|
||||
void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
@ -107,8 +107,8 @@ void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray
|
||||
|
||||
size_t bufSize = intBufSize + 2 * 256 * sizeof(int);
|
||||
|
||||
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, _buf);
|
||||
GpuMat buf = _buf.getGpuMat();
|
||||
BufferPool pool(_stream);
|
||||
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
|
||||
|
||||
GpuMat hist(1, 256, CV_32SC1, buf.data);
|
||||
GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int));
|
||||
@ -288,7 +288,7 @@ namespace
|
||||
{
|
||||
typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
|
||||
|
||||
static void hist(const GpuMat& src, OutputArray _hist, InputOutputArray _buf, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
|
||||
static void hist(const GpuMat& src, OutputArray _hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
||||
{
|
||||
const int levels = histSize + 1;
|
||||
|
||||
@ -302,15 +302,15 @@ namespace
|
||||
int buf_size;
|
||||
get_buf_size(sz, levels, &buf_size);
|
||||
|
||||
ensureSizeIsEnough(1, buf_size, CV_8UC1, _buf);
|
||||
GpuMat buf = _buf.getGpuMat();
|
||||
BufferPool pool(stream);
|
||||
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
|
||||
lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
|
||||
|
||||
if (stream == 0)
|
||||
if (!stream)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
@ -319,7 +319,7 @@ namespace
|
||||
{
|
||||
typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
|
||||
|
||||
static void hist(const GpuMat& src, GpuMat hist[4],InputOutputArray _buf, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)
|
||||
static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
|
||||
{
|
||||
int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
|
||||
hist[0].create(1, histSize[0], CV_32S);
|
||||
@ -336,14 +336,14 @@ namespace
|
||||
int buf_size;
|
||||
get_buf_size(sz, levels, &buf_size);
|
||||
|
||||
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
|
||||
GpuMat buf = _buf.getGpuMat();
|
||||
BufferPool pool(stream);
|
||||
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
|
||||
|
||||
if (stream == 0)
|
||||
if (!stream)
|
||||
cudaSafeCall( cudaDeviceSynchronize() );
|
||||
}
|
||||
};
|
||||
@ -392,7 +392,7 @@ namespace
|
||||
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
||||
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
||||
|
||||
static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, InputOutputArray _buf, cudaStream_t stream)
|
||||
static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, Stream& stream)
|
||||
{
|
||||
CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 );
|
||||
|
||||
@ -406,8 +406,8 @@ namespace
|
||||
int buf_size;
|
||||
get_buf_size(sz, levels.cols, &buf_size);
|
||||
|
||||
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
|
||||
GpuMat buf = _buf.getGpuMat();
|
||||
BufferPool pool(stream);
|
||||
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
@ -424,7 +424,7 @@ namespace
|
||||
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
||||
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
||||
|
||||
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4],InputOutputArray _buf, cudaStream_t stream)
|
||||
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
|
||||
{
|
||||
CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 );
|
||||
CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 );
|
||||
@ -447,8 +447,8 @@ namespace
|
||||
int buf_size;
|
||||
get_buf_size(sz, nLevels, &buf_size);
|
||||
|
||||
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
|
||||
GpuMat buf = _buf.getGpuMat();
|
||||
BufferPool pool(stream);
|
||||
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||
|
||||
NppStreamHandler h(stream);
|
||||
|
||||
@ -460,7 +460,7 @@ namespace
|
||||
};
|
||||
}
|
||||
|
||||
void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel)
|
||||
void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel, Stream& stream)
|
||||
{
|
||||
const int kind = _levels.kind();
|
||||
|
||||
@ -475,7 +475,7 @@ void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int
|
||||
nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
|
||||
|
||||
if (kind == _InputArray::CUDA_GPU_MAT)
|
||||
_levels.getGpuMatRef().upload(host_levels);
|
||||
_levels.getGpuMatRef().upload(host_levels, stream);
|
||||
}
|
||||
|
||||
namespace hist
|
||||
@ -493,9 +493,9 @@ namespace
|
||||
}
|
||||
}
|
||||
|
||||
void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
||||
void cv::cuda::histEven(InputArray _src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
||||
{
|
||||
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);
|
||||
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, int levels, int lowerLevel, int upperLevel, Stream& stream);
|
||||
static const hist_t hist_callers[] =
|
||||
{
|
||||
NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
|
||||
@ -514,12 +514,12 @@ void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf,
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
|
||||
|
||||
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
||||
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
|
||||
void cv::cuda::histEven(InputArray _src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
|
||||
{
|
||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);
|
||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4], Stream& stream);
|
||||
static const hist_t hist_callers[] =
|
||||
{
|
||||
NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
|
||||
@ -532,12 +532,12 @@ void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, i
|
||||
|
||||
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
|
||||
|
||||
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
||||
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream)
|
||||
void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, Stream& stream)
|
||||
{
|
||||
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream);
|
||||
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, Stream& stream);
|
||||
static const hist_t hist_callers[] =
|
||||
{
|
||||
NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
|
||||
@ -553,12 +553,12 @@ void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels,
|
||||
|
||||
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 );
|
||||
|
||||
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
||||
hist_callers[src.depth()](src, hist, levels, stream);
|
||||
}
|
||||
|
||||
void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream)
|
||||
void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
|
||||
{
|
||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream);
|
||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream);
|
||||
static const hist_t hist_callers[] =
|
||||
{
|
||||
NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
|
||||
@ -573,7 +573,7 @@ void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4]
|
||||
|
||||
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 );
|
||||
|
||||
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
||||
hist_callers[src.depth()](src, hist, levels, stream);
|
||||
}
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
@ -74,7 +74,7 @@ namespace
|
||||
public:
|
||||
HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
|
||||
|
||||
void detect(InputArray src, OutputArray circles);
|
||||
void detect(InputArray src, OutputArray circles, Stream& stream);
|
||||
|
||||
void setDp(float dp) { dp_ = dp; }
|
||||
float getDp() const { return dp_; }
|
||||
@ -154,8 +154,11 @@ namespace
|
||||
filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
|
||||
}
|
||||
|
||||
void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
|
||||
void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles, Stream& stream)
|
||||
{
|
||||
// TODO : implement async version
|
||||
(void) stream;
|
||||
|
||||
using namespace cv::cuda::device::hough;
|
||||
using namespace cv::cuda::device::hough_circles;
|
||||
|
||||
|
@ -75,8 +75,8 @@ namespace
|
||||
{
|
||||
}
|
||||
|
||||
void detect(InputArray src, OutputArray lines);
|
||||
void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
|
||||
void detect(InputArray src, OutputArray lines, Stream& stream);
|
||||
void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes, Stream& stream);
|
||||
|
||||
void setRho(float rho) { rho_ = rho; }
|
||||
float getRho() const { return rho_; }
|
||||
@ -125,8 +125,11 @@ namespace
|
||||
GpuMat result_;
|
||||
};
|
||||
|
||||
void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
|
||||
void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines, Stream& stream)
|
||||
{
|
||||
// TODO : implement async version
|
||||
(void) stream;
|
||||
|
||||
using namespace cv::cuda::device::hough;
|
||||
using namespace cv::cuda::device::hough_lines;
|
||||
|
||||
@ -170,7 +173,7 @@ namespace
|
||||
result_.copyTo(lines);
|
||||
}
|
||||
|
||||
void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes)
|
||||
void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes, Stream& stream)
|
||||
{
|
||||
GpuMat d_lines = _d_lines.getGpuMat();
|
||||
|
||||
@ -184,11 +187,17 @@ namespace
|
||||
|
||||
CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
|
||||
|
||||
if (stream)
|
||||
d_lines.row(0).download(h_lines, stream);
|
||||
else
|
||||
d_lines.row(0).download(h_lines);
|
||||
|
||||
if (h_votes.needed())
|
||||
{
|
||||
GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
|
||||
if (stream)
|
||||
d_votes.download(h_votes, stream);
|
||||
else
|
||||
d_votes.download(h_votes);
|
||||
}
|
||||
}
|
||||
|
@ -79,7 +79,7 @@ namespace
|
||||
{
|
||||
}
|
||||
|
||||
void detect(InputArray src, OutputArray lines);
|
||||
void detect(InputArray src, OutputArray lines, Stream& stream);
|
||||
|
||||
void setRho(float rho) { rho_ = rho; }
|
||||
float getRho() const { return rho_; }
|
||||
@ -128,8 +128,11 @@ namespace
|
||||
GpuMat result_;
|
||||
};
|
||||
|
||||
void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines)
|
||||
void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines, Stream& stream)
|
||||
{
|
||||
// TODO : implement async version
|
||||
(void) stream;
|
||||
|
||||
using namespace cv::cuda::device::hough;
|
||||
using namespace cv::cuda::device::hough_lines;
|
||||
using namespace cv::cuda::device::hough_segments;
|
||||
|
@ -43,7 +43,7 @@
|
||||
|
||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||
|
||||
void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria) { throw_no_cuda(); }
|
||||
void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
|
||||
|
||||
#else
|
||||
|
||||
@ -222,7 +222,7 @@ inline int dist2(const cv::Vec2s& lhs, const cv::Vec2s& rhs)
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria)
|
||||
void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria, Stream& stream)
|
||||
{
|
||||
GpuMat src = _src.getGpuMat();
|
||||
|
||||
@ -235,7 +235,10 @@ void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp,
|
||||
|
||||
// Perform mean shift procedure and obtain region and spatial maps
|
||||
GpuMat d_rmap, d_spmap;
|
||||
cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
|
||||
cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria, stream);
|
||||
|
||||
stream.waitForCompletion();
|
||||
|
||||
Mat rmap(d_rmap);
|
||||
Mat spmap(d_spmap);
|
||||
|
||||
|
@ -1053,12 +1053,11 @@ TEST(equalizeHist)
|
||||
|
||||
cuda::GpuMat d_src(src);
|
||||
cuda::GpuMat d_dst;
|
||||
cuda::GpuMat d_buf;
|
||||
|
||||
cuda::equalizeHist(d_src, d_dst, d_buf);
|
||||
cuda::equalizeHist(d_src, d_dst);
|
||||
|
||||
CUDA_ON;
|
||||
cuda::equalizeHist(d_src, d_dst, d_buf);
|
||||
cuda::equalizeHist(d_src, d_dst);
|
||||
CUDA_OFF;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user