Merge pull request #3566 from jet47:cuda-imgproc-refactoring
This commit is contained in:
commit
2ecca8f59c
@ -130,6 +130,12 @@ namespace cv { namespace cuda
|
|||||||
class NppStreamHandler
|
class NppStreamHandler
|
||||||
{
|
{
|
||||||
public:
|
public:
|
||||||
|
inline explicit NppStreamHandler(Stream& newStream)
|
||||||
|
{
|
||||||
|
oldStream = nppGetStream();
|
||||||
|
nppSetStream(StreamAccessor::getStream(newStream));
|
||||||
|
}
|
||||||
|
|
||||||
inline explicit NppStreamHandler(cudaStream_t newStream)
|
inline explicit NppStreamHandler(cudaStream_t newStream)
|
||||||
{
|
{
|
||||||
oldStream = nppGetStream();
|
oldStream = nppGetStream();
|
||||||
|
@ -205,19 +205,11 @@ CV_EXPORTS void calcHist(InputArray src, OutputArray hist, Stream& stream = Stre
|
|||||||
|
|
||||||
@param src Source image with CV_8UC1 type.
|
@param src Source image with CV_8UC1 type.
|
||||||
@param dst Destination image.
|
@param dst Destination image.
|
||||||
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
|
|
||||||
@param stream Stream for the asynchronous version.
|
@param stream Stream for the asynchronous version.
|
||||||
|
|
||||||
@sa equalizeHist
|
@sa equalizeHist
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, InputOutputArray buf, Stream& stream = Stream::Null());
|
CV_EXPORTS void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
/** @overload */
|
|
||||||
static inline void equalizeHist(InputArray src, OutputArray dst, Stream& stream = Stream::Null())
|
|
||||||
{
|
|
||||||
GpuMat buf;
|
|
||||||
cuda::equalizeHist(src, dst, buf, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. :
|
/** @brief Base class for Contrast Limited Adaptive Histogram Equalization. :
|
||||||
*/
|
*/
|
||||||
@ -248,8 +240,9 @@ CV_EXPORTS Ptr<cuda::CLAHE> createCLAHE(double clipLimit = 40.0, Size tileGridSi
|
|||||||
@param nLevels Number of computed levels. nLevels must be at least 2.
|
@param nLevels Number of computed levels. nLevels must be at least 2.
|
||||||
@param lowerLevel Lower boundary value of the lowest level.
|
@param lowerLevel Lower boundary value of the lowest level.
|
||||||
@param upperLevel Upper boundary value of the greatest level.
|
@param upperLevel Upper boundary value of the greatest level.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel);
|
CV_EXPORTS void evenLevels(OutputArray levels, int nLevels, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
/** @brief Calculates a histogram with evenly distributed bins.
|
/** @brief Calculates a histogram with evenly distributed bins.
|
||||||
|
|
||||||
@ -259,27 +252,11 @@ a four-channel image, all channels are processed separately.
|
|||||||
@param histSize Size of the histogram.
|
@param histSize Size of the histogram.
|
||||||
@param lowerLevel Lower boundary of lowest-level bin.
|
@param lowerLevel Lower boundary of lowest-level bin.
|
||||||
@param upperLevel Upper boundary of highest-level bin.
|
@param upperLevel Upper boundary of highest-level bin.
|
||||||
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
|
|
||||||
@param stream Stream for the asynchronous version.
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS void histEven(InputArray src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
|
CV_EXPORTS void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
/** @overload */
|
/** @overload */
|
||||||
static inline void histEven(InputArray src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream = Stream::Null())
|
CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
|
||||||
{
|
|
||||||
GpuMat buf;
|
|
||||||
cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @overload */
|
|
||||||
CV_EXPORTS void histEven(InputArray src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null());
|
|
||||||
|
|
||||||
/** @overload */
|
|
||||||
static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream = Stream::Null())
|
|
||||||
{
|
|
||||||
GpuMat buf;
|
|
||||||
cuda::histEven(src, hist, buf, histSize, lowerLevel, upperLevel, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @brief Calculates a histogram with bins determined by the levels array.
|
/** @brief Calculates a histogram with bins determined by the levels array.
|
||||||
|
|
||||||
@ -287,27 +264,11 @@ static inline void histEven(InputArray src, GpuMat hist[4], int histSize[4], int
|
|||||||
For a four-channel image, all channels are processed separately.
|
For a four-channel image, all channels are processed separately.
|
||||||
@param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type.
|
@param hist Destination histogram with one row, (levels.cols-1) columns, and the CV_32SC1 type.
|
||||||
@param levels Number of levels in the histogram.
|
@param levels Number of levels in the histogram.
|
||||||
@param buf Optional buffer to avoid extra memory allocations (for many calls with the same sizes).
|
|
||||||
@param stream Stream for the asynchronous version.
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, InputOutputArray buf, Stream& stream = Stream::Null());
|
CV_EXPORTS void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
/** @overload */
|
/** @overload */
|
||||||
static inline void histRange(InputArray src, OutputArray hist, InputArray levels, Stream& stream = Stream::Null())
|
CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null());
|
||||||
{
|
|
||||||
GpuMat buf;
|
|
||||||
cuda::histRange(src, hist, levels, buf, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
/** @overload */
|
|
||||||
CV_EXPORTS void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream = Stream::Null());
|
|
||||||
|
|
||||||
/** @overload */
|
|
||||||
static inline void histRange(InputArray src, GpuMat hist[4], const GpuMat levels[4], Stream& stream = Stream::Null())
|
|
||||||
{
|
|
||||||
GpuMat buf;
|
|
||||||
cuda::histRange(src, hist, levels, buf, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
//! @} cudaimgproc_hist
|
//! @} cudaimgproc_hist
|
||||||
|
|
||||||
@ -321,15 +282,17 @@ public:
|
|||||||
/** @brief Finds edges in an image using the @cite Canny86 algorithm.
|
/** @brief Finds edges in an image using the @cite Canny86 algorithm.
|
||||||
|
|
||||||
@param image Single-channel 8-bit input image.
|
@param image Single-channel 8-bit input image.
|
||||||
@param edges Output edge map. It has the same size and type as image .
|
@param edges Output edge map. It has the same size and type as image.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
virtual void detect(InputArray image, OutputArray edges) = 0;
|
virtual void detect(InputArray image, OutputArray edges, Stream& stream = Stream::Null()) = 0;
|
||||||
/** @overload
|
/** @overload
|
||||||
@param dx First derivative of image in the vertical direction. Support only CV_32S type.
|
@param dx First derivative of image in the vertical direction. Support only CV_32S type.
|
||||||
@param dy First derivative of image in the horizontal direction. Support only CV_32S type.
|
@param dy First derivative of image in the horizontal direction. Support only CV_32S type.
|
||||||
@param edges Output edge map. It has the same size and type as image .
|
@param edges Output edge map. It has the same size and type as image.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
virtual void detect(InputArray dx, InputArray dy, OutputArray edges) = 0;
|
virtual void detect(InputArray dx, InputArray dy, OutputArray edges, Stream& stream = Stream::Null()) = 0;
|
||||||
|
|
||||||
virtual void setLowThreshold(double low_thresh) = 0;
|
virtual void setLowThreshold(double low_thresh) = 0;
|
||||||
virtual double getLowThreshold() const = 0;
|
virtual double getLowThreshold() const = 0;
|
||||||
@ -376,18 +339,20 @@ public:
|
|||||||
\f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
|
\f$(\rho, \theta)\f$ . \f$\rho\f$ is the distance from the coordinate origin \f$(0,0)\f$ (top-left corner of
|
||||||
the image). \f$\theta\f$ is the line rotation angle in radians (
|
the image). \f$\theta\f$ is the line rotation angle in radians (
|
||||||
\f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
|
\f$0 \sim \textrm{vertical line}, \pi/2 \sim \textrm{horizontal line}\f$ ).
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
|
|
||||||
@sa HoughLines
|
@sa HoughLines
|
||||||
*/
|
*/
|
||||||
virtual void detect(InputArray src, OutputArray lines) = 0;
|
virtual void detect(InputArray src, OutputArray lines, Stream& stream = Stream::Null()) = 0;
|
||||||
|
|
||||||
/** @brief Downloads results from cuda::HoughLinesDetector::detect to host memory.
|
/** @brief Downloads results from cuda::HoughLinesDetector::detect to host memory.
|
||||||
|
|
||||||
@param d_lines Result of cuda::HoughLinesDetector::detect .
|
@param d_lines Result of cuda::HoughLinesDetector::detect .
|
||||||
@param h_lines Output host array.
|
@param h_lines Output host array.
|
||||||
@param h_votes Optional output array for line's votes.
|
@param h_votes Optional output array for line's votes.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray()) = 0;
|
virtual void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray(), Stream& stream = Stream::Null()) = 0;
|
||||||
|
|
||||||
virtual void setRho(float rho) = 0;
|
virtual void setRho(float rho) = 0;
|
||||||
virtual float getRho() const = 0;
|
virtual float getRho() const = 0;
|
||||||
@ -431,10 +396,11 @@ public:
|
|||||||
@param lines Output vector of lines. Each line is represented by a 4-element vector
|
@param lines Output vector of lines. Each line is represented by a 4-element vector
|
||||||
\f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected
|
\f$(x_1, y_1, x_2, y_2)\f$ , where \f$(x_1,y_1)\f$ and \f$(x_2, y_2)\f$ are the ending points of each detected
|
||||||
line segment.
|
line segment.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
|
|
||||||
@sa HoughLinesP
|
@sa HoughLinesP
|
||||||
*/
|
*/
|
||||||
virtual void detect(InputArray src, OutputArray lines) = 0;
|
virtual void detect(InputArray src, OutputArray lines, Stream& stream = Stream::Null()) = 0;
|
||||||
|
|
||||||
virtual void setRho(float rho) = 0;
|
virtual void setRho(float rho) = 0;
|
||||||
virtual float getRho() const = 0;
|
virtual float getRho() const = 0;
|
||||||
@ -475,10 +441,11 @@ public:
|
|||||||
@param src 8-bit, single-channel grayscale input image.
|
@param src 8-bit, single-channel grayscale input image.
|
||||||
@param circles Output vector of found circles. Each vector is encoded as a 3-element
|
@param circles Output vector of found circles. Each vector is encoded as a 3-element
|
||||||
floating-point vector \f$(x, y, radius)\f$ .
|
floating-point vector \f$(x, y, radius)\f$ .
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
|
|
||||||
@sa HoughCircles
|
@sa HoughCircles
|
||||||
*/
|
*/
|
||||||
virtual void detect(InputArray src, OutputArray circles) = 0;
|
virtual void detect(InputArray src, OutputArray circles, Stream& stream = Stream::Null()) = 0;
|
||||||
|
|
||||||
virtual void setDp(float dp) = 0;
|
virtual void setDp(float dp) = 0;
|
||||||
virtual float getDp() const = 0;
|
virtual float getDp() const = 0;
|
||||||
@ -593,8 +560,9 @@ public:
|
|||||||
positions).
|
positions).
|
||||||
@param mask Optional region of interest. If the image is not empty (it needs to have the type
|
@param mask Optional region of interest. If the image is not empty (it needs to have the type
|
||||||
CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
|
CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray()) = 0;
|
virtual void detect(InputArray image, OutputArray corners, InputArray mask = noArray(), Stream& stream = Stream::Null()) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** @brief Creates implementation for cuda::CornersDetector .
|
/** @brief Creates implementation for cuda::CornersDetector .
|
||||||
@ -630,7 +598,7 @@ as src .
|
|||||||
@param sp Spatial window radius.
|
@param sp Spatial window radius.
|
||||||
@param sr Color window radius.
|
@param sr Color window radius.
|
||||||
@param criteria Termination criteria. See TermCriteria.
|
@param criteria Termination criteria. See TermCriteria.
|
||||||
@param stream
|
@param stream Stream for the asynchronous version.
|
||||||
|
|
||||||
It maps each point of the source image into another point. As a result, you have a new color and new
|
It maps each point of the source image into another point. As a result, you have a new color and new
|
||||||
position of each point.
|
position of each point.
|
||||||
@ -650,7 +618,7 @@ src size. The type is CV_16SC2 .
|
|||||||
@param sp Spatial window radius.
|
@param sp Spatial window radius.
|
||||||
@param sr Color window radius.
|
@param sr Color window radius.
|
||||||
@param criteria Termination criteria. See TermCriteria.
|
@param criteria Termination criteria. See TermCriteria.
|
||||||
@param stream
|
@param stream Stream for the asynchronous version.
|
||||||
|
|
||||||
@sa cuda::meanShiftFiltering
|
@sa cuda::meanShiftFiltering
|
||||||
*/
|
*/
|
||||||
@ -666,9 +634,11 @@ CV_EXPORTS void meanShiftProc(InputArray src, OutputArray dstr, OutputArray dsts
|
|||||||
@param sr Color window radius.
|
@param sr Color window radius.
|
||||||
@param minsize Minimum segment size. Smaller segments are merged.
|
@param minsize Minimum segment size. Smaller segments are merged.
|
||||||
@param criteria Termination criteria. See TermCriteria.
|
@param criteria Termination criteria. See TermCriteria.
|
||||||
|
@param stream Stream for the asynchronous version.
|
||||||
*/
|
*/
|
||||||
CV_EXPORTS void meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize,
|
CV_EXPORTS void meanShiftSegmentation(InputArray src, OutputArray dst, int sp, int sr, int minsize,
|
||||||
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1));
|
TermCriteria criteria = TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 5, 1),
|
||||||
|
Stream& stream = Stream::Null());
|
||||||
|
|
||||||
/////////////////////////// Match Template ////////////////////////////
|
/////////////////////////// Match Template ////////////////////////////
|
||||||
|
|
||||||
|
@ -63,9 +63,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC1,
|
|||||||
{
|
{
|
||||||
const cv::cuda::GpuMat d_src(src);
|
const cv::cuda::GpuMat d_src(src);
|
||||||
cv::cuda::GpuMat dst;
|
cv::cuda::GpuMat dst;
|
||||||
cv::cuda::GpuMat d_buf;
|
|
||||||
|
|
||||||
TEST_CYCLE() cv::cuda::histEven(d_src, dst, d_buf, 30, 0, 180);
|
TEST_CYCLE() cv::cuda::histEven(d_src, dst, 30, 0, 180);
|
||||||
|
|
||||||
CUDA_SANITY_CHECK(dst);
|
CUDA_SANITY_CHECK(dst);
|
||||||
}
|
}
|
||||||
@ -106,9 +105,8 @@ PERF_TEST_P(Sz_Depth, HistEvenC4,
|
|||||||
{
|
{
|
||||||
const cv::cuda::GpuMat d_src(src);
|
const cv::cuda::GpuMat d_src(src);
|
||||||
cv::cuda::GpuMat d_hist[4];
|
cv::cuda::GpuMat d_hist[4];
|
||||||
cv::cuda::GpuMat d_buf;
|
|
||||||
|
|
||||||
TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, d_buf, histSize, lowerLevel, upperLevel);
|
TEST_CYCLE() cv::cuda::histEven(d_src, d_hist, histSize, lowerLevel, upperLevel);
|
||||||
|
|
||||||
cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
|
cv::Mat cpu_hist0, cpu_hist1, cpu_hist2, cpu_hist3;
|
||||||
d_hist[0].download(cpu_hist0);
|
d_hist[0].download(cpu_hist0);
|
||||||
@ -167,9 +165,8 @@ PERF_TEST_P(Sz, EqualizeHist,
|
|||||||
{
|
{
|
||||||
const cv::cuda::GpuMat d_src(src);
|
const cv::cuda::GpuMat d_src(src);
|
||||||
cv::cuda::GpuMat dst;
|
cv::cuda::GpuMat dst;
|
||||||
cv::cuda::GpuMat d_buf;
|
|
||||||
|
|
||||||
TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst, d_buf);
|
TEST_CYCLE() cv::cuda::equalizeHist(d_src, dst);
|
||||||
|
|
||||||
CUDA_SANITY_CHECK(dst);
|
CUDA_SANITY_CHECK(dst);
|
||||||
}
|
}
|
||||||
|
@ -53,16 +53,16 @@ Ptr<CannyEdgeDetector> cv::cuda::createCannyEdgeDetector(double, double, int, bo
|
|||||||
|
|
||||||
namespace canny
|
namespace canny
|
||||||
{
|
{
|
||||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
|
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream);
|
||||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
|
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream);
|
||||||
|
|
||||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh);
|
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream);
|
||||||
|
|
||||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1);
|
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream);
|
||||||
|
|
||||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2);
|
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream);
|
||||||
|
|
||||||
void getEdges(PtrStepSzi map, PtrStepSzb dst);
|
void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
@ -76,8 +76,8 @@ namespace
|
|||||||
old_apperture_size_ = -1;
|
old_apperture_size_ = -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
void detect(InputArray image, OutputArray edges);
|
void detect(InputArray image, OutputArray edges, Stream& stream);
|
||||||
void detect(InputArray dx, InputArray dy, OutputArray edges);
|
void detect(InputArray dx, InputArray dy, OutputArray edges, Stream& stream);
|
||||||
|
|
||||||
void setLowThreshold(double low_thresh) { low_thresh_ = low_thresh; }
|
void setLowThreshold(double low_thresh) { low_thresh_ = low_thresh; }
|
||||||
double getLowThreshold() const { return low_thresh_; }
|
double getLowThreshold() const { return low_thresh_; }
|
||||||
@ -111,7 +111,7 @@ namespace
|
|||||||
|
|
||||||
private:
|
private:
|
||||||
void createBuf(Size image_size);
|
void createBuf(Size image_size);
|
||||||
void CannyCaller(GpuMat& edges);
|
void CannyCaller(GpuMat& edges, Stream& stream);
|
||||||
|
|
||||||
double low_thresh_;
|
double low_thresh_;
|
||||||
double high_thresh_;
|
double high_thresh_;
|
||||||
@ -128,7 +128,7 @@ namespace
|
|||||||
int old_apperture_size_;
|
int old_apperture_size_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void CannyImpl::detect(InputArray _image, OutputArray _edges)
|
void CannyImpl::detect(InputArray _image, OutputArray _edges, Stream& stream)
|
||||||
{
|
{
|
||||||
GpuMat image = _image.getGpuMat();
|
GpuMat image = _image.getGpuMat();
|
||||||
|
|
||||||
@ -150,24 +150,24 @@ namespace
|
|||||||
image.locateROI(wholeSize, ofs);
|
image.locateROI(wholeSize, ofs);
|
||||||
GpuMat srcWhole(wholeSize, image.type(), image.datastart, image.step);
|
GpuMat srcWhole(wholeSize, image.type(), image.datastart, image.step);
|
||||||
|
|
||||||
canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_);
|
canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
#ifndef HAVE_OPENCV_CUDAFILTERS
|
#ifndef HAVE_OPENCV_CUDAFILTERS
|
||||||
throw_no_cuda();
|
throw_no_cuda();
|
||||||
#else
|
#else
|
||||||
filterDX_->apply(image, dx_);
|
filterDX_->apply(image, dx_, stream);
|
||||||
filterDY_->apply(image, dy_);
|
filterDY_->apply(image, dy_, stream);
|
||||||
|
|
||||||
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
|
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
CannyCaller(edges);
|
CannyCaller(edges, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges)
|
void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges, Stream& stream)
|
||||||
{
|
{
|
||||||
GpuMat dx = _dx.getGpuMat();
|
GpuMat dx = _dx.getGpuMat();
|
||||||
GpuMat dy = _dy.getGpuMat();
|
GpuMat dy = _dy.getGpuMat();
|
||||||
@ -176,8 +176,8 @@ namespace
|
|||||||
CV_Assert( dy.type() == dx.type() && dy.size() == dx.size() );
|
CV_Assert( dy.type() == dx.type() && dy.size() == dx.size() );
|
||||||
CV_Assert( deviceSupports(SHARED_ATOMICS) );
|
CV_Assert( deviceSupports(SHARED_ATOMICS) );
|
||||||
|
|
||||||
dx.copyTo(dx_);
|
dx.copyTo(dx_, stream);
|
||||||
dy.copyTo(dy_);
|
dy.copyTo(dy_, stream);
|
||||||
|
|
||||||
if (low_thresh_ > high_thresh_)
|
if (low_thresh_ > high_thresh_)
|
||||||
std::swap(low_thresh_, high_thresh_);
|
std::swap(low_thresh_, high_thresh_);
|
||||||
@ -187,9 +187,9 @@ namespace
|
|||||||
_edges.create(dx.size(), CV_8UC1);
|
_edges.create(dx.size(), CV_8UC1);
|
||||||
GpuMat edges = _edges.getGpuMat();
|
GpuMat edges = _edges.getGpuMat();
|
||||||
|
|
||||||
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
|
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_, StreamAccessor::getStream(stream));
|
||||||
|
|
||||||
CannyCaller(edges);
|
CannyCaller(edges, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CannyImpl::createBuf(Size image_size)
|
void CannyImpl::createBuf(Size image_size)
|
||||||
@ -215,16 +215,16 @@ namespace
|
|||||||
ensureSizeIsEnough(1, image_size.area(), CV_16SC2, st2_);
|
ensureSizeIsEnough(1, image_size.area(), CV_16SC2, st2_);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CannyImpl::CannyCaller(GpuMat& edges)
|
void CannyImpl::CannyCaller(GpuMat& edges, Stream& stream)
|
||||||
{
|
{
|
||||||
map_.setTo(Scalar::all(0));
|
map_.setTo(Scalar::all(0));
|
||||||
canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_));
|
canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_), StreamAccessor::getStream(stream));
|
||||||
|
|
||||||
canny::edgesHysteresisLocal(map_, st1_.ptr<short2>());
|
canny::edgesHysteresisLocal(map_, st1_.ptr<short2>(), StreamAccessor::getStream(stream));
|
||||||
|
|
||||||
canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>());
|
canny::edgesHysteresisGlobal(map_, st1_.ptr<short2>(), st2_.ptr<short2>(), StreamAccessor::getStream(stream));
|
||||||
|
|
||||||
canny::getEdges(map_, edges);
|
canny::getEdges(map_, edges, StreamAccessor::getStream(stream));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -120,7 +120,7 @@ namespace canny
|
|||||||
mag(y, x) = norm(dxVal, dyVal);
|
mag(y, x) = norm(dxVal, dyVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
|
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
|
||||||
@ -131,30 +131,31 @@ namespace canny
|
|||||||
if (L2Grad)
|
if (L2Grad)
|
||||||
{
|
{
|
||||||
L2 norm;
|
L2 norm;
|
||||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
L1 norm;
|
L1 norm;
|
||||||
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
|
calcMagnitudeKernel<<<grid, block, 0, stream>>>(src, dx, dy, mag, norm);
|
||||||
}
|
}
|
||||||
|
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
if (stream == NULL)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
|
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
if (L2Grad)
|
if (L2Grad)
|
||||||
{
|
{
|
||||||
L2 norm;
|
L2 norm;
|
||||||
transform(dx, dy, mag, norm, WithOutMask(), 0);
|
transform(dx, dy, mag, norm, WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
L1 norm;
|
L1 norm;
|
||||||
transform(dx, dy, mag, norm, WithOutMask(), 0);
|
transform(dx, dy, mag, norm, WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -217,17 +218,18 @@ namespace canny
|
|||||||
map(y, x) = edge_type;
|
map(y, x) = edge_type;
|
||||||
}
|
}
|
||||||
|
|
||||||
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
|
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
|
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
|
||||||
|
|
||||||
bindTexture(&tex_mag, mag);
|
bindTexture(&tex_mag, mag);
|
||||||
|
|
||||||
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
|
calcMapKernel<<<grid, block, 0, stream>>>(dx, dy, map, low_thresh, high_thresh);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
if (stream == NULL)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -328,20 +330,21 @@ namespace canny
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void edgesHysteresisLocal(PtrStepSzi map, short2* st1)
|
void edgesHysteresisLocal(PtrStepSzi map, short2* st1, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
void* counter_ptr;
|
void* counter_ptr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
|
||||||
|
|
||||||
const dim3 block(16, 16);
|
const dim3 block(16, 16);
|
||||||
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
|
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
|
||||||
|
|
||||||
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
|
edgesHysteresisLocalKernel<<<grid, block, 0, stream>>>(map, st1);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
if (stream == NULL)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -441,27 +444,30 @@ namespace canny
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2)
|
void edgesHysteresisGlobal(PtrStepSzi map, short2* st1, short2* st2, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
void* counter_ptr;
|
void* counter_ptr;
|
||||||
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
|
||||||
|
|
||||||
int count;
|
int count;
|
||||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||||
|
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||||
|
|
||||||
while (count > 0)
|
while (count > 0)
|
||||||
{
|
{
|
||||||
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
|
cudaSafeCall( cudaMemsetAsync(counter_ptr, 0, sizeof(int), stream) );
|
||||||
|
|
||||||
const dim3 block(128);
|
const dim3 block(128);
|
||||||
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
|
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
|
||||||
|
|
||||||
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
|
edgesHysteresisGlobalKernel<<<grid, block, 0, stream>>>(map, st1, st2, count);
|
||||||
cudaSafeCall( cudaGetLastError() );
|
cudaSafeCall( cudaGetLastError() );
|
||||||
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
if (stream == NULL)
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
|
|
||||||
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
|
cudaSafeCall( cudaMemcpyAsync(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost, stream) );
|
||||||
|
cudaSafeCall( cudaStreamSynchronize(stream) );
|
||||||
|
|
||||||
count = min(count, map.cols * map.rows);
|
count = min(count, map.cols * map.rows);
|
||||||
|
|
||||||
@ -499,9 +505,9 @@ namespace cv { namespace cuda { namespace device
|
|||||||
|
|
||||||
namespace canny
|
namespace canny
|
||||||
{
|
{
|
||||||
void getEdges(PtrStepSzi map, PtrStepSzb dst)
|
void getEdges(PtrStepSzi map, PtrStepSzb dst, cudaStream_t stream)
|
||||||
{
|
{
|
||||||
transform(map, dst, GetEdges(), WithOutMask(), 0);
|
transform(map, dst, GetEdges(), WithOutMask(), stream);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ namespace
|
|||||||
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
|
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
|
||||||
int blockSize, bool useHarrisDetector, double harrisK);
|
int blockSize, bool useHarrisDetector, double harrisK);
|
||||||
|
|
||||||
void detect(InputArray image, OutputArray corners, InputArray mask = noArray());
|
void detect(InputArray image, OutputArray corners, InputArray mask, Stream& stream);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
int maxCorners_;
|
int maxCorners_;
|
||||||
@ -96,8 +96,11 @@ namespace
|
|||||||
cuda::createMinEigenValCorner(srcType, blockSize, 3);
|
cuda::createMinEigenValCorner(srcType, blockSize, 3);
|
||||||
}
|
}
|
||||||
|
|
||||||
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask)
|
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask, Stream& stream)
|
||||||
{
|
{
|
||||||
|
// TODO : implement async version
|
||||||
|
(void) stream;
|
||||||
|
|
||||||
using namespace cv::cuda::device::gfft;
|
using namespace cv::cuda::device::gfft;
|
||||||
|
|
||||||
GpuMat image = _image.getGpuMat();
|
GpuMat image = _image.getGpuMat();
|
||||||
|
@ -49,11 +49,11 @@ using namespace cv::cuda;
|
|||||||
|
|
||||||
void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||||
|
|
||||||
void cv::cuda::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
|
void cv::cuda::equalizeHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
|
||||||
|
|
||||||
cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
|
cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
|
||||||
|
|
||||||
void cv::cuda::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); }
|
void cv::cuda::evenLevels(OutputArray, int, int, int, Stream&) { throw_no_cuda(); }
|
||||||
|
|
||||||
void cv::cuda::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
|
void cv::cuda::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
|
||||||
void cv::cuda::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
|
void cv::cuda::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
|
||||||
@ -93,7 +93,7 @@ namespace hist
|
|||||||
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
|
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream)
|
void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, Stream& _stream)
|
||||||
{
|
{
|
||||||
GpuMat src = _src.getGpuMat();
|
GpuMat src = _src.getGpuMat();
|
||||||
|
|
||||||
@ -107,8 +107,8 @@ void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray
|
|||||||
|
|
||||||
size_t bufSize = intBufSize + 2 * 256 * sizeof(int);
|
size_t bufSize = intBufSize + 2 * 256 * sizeof(int);
|
||||||
|
|
||||||
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, _buf);
|
BufferPool pool(_stream);
|
||||||
GpuMat buf = _buf.getGpuMat();
|
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
|
||||||
|
|
||||||
GpuMat hist(1, 256, CV_32SC1, buf.data);
|
GpuMat hist(1, 256, CV_32SC1, buf.data);
|
||||||
GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int));
|
GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int));
|
||||||
@ -288,7 +288,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
|
typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
|
||||||
|
|
||||||
static void hist(const GpuMat& src, OutputArray _hist, InputOutputArray _buf, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
|
static void hist(const GpuMat& src, OutputArray _hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
||||||
{
|
{
|
||||||
const int levels = histSize + 1;
|
const int levels = histSize + 1;
|
||||||
|
|
||||||
@ -302,15 +302,15 @@ namespace
|
|||||||
int buf_size;
|
int buf_size;
|
||||||
get_buf_size(sz, levels, &buf_size);
|
get_buf_size(sz, levels, &buf_size);
|
||||||
|
|
||||||
ensureSizeIsEnough(1, buf_size, CV_8UC1, _buf);
|
BufferPool pool(stream);
|
||||||
GpuMat buf = _buf.getGpuMat();
|
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||||
|
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
|
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
|
||||||
lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
|
lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
|
||||||
|
|
||||||
if (stream == 0)
|
if (!stream)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -319,7 +319,7 @@ namespace
|
|||||||
{
|
{
|
||||||
typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
|
typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
|
||||||
|
|
||||||
static void hist(const GpuMat& src, GpuMat hist[4],InputOutputArray _buf, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)
|
static void hist(const GpuMat& src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
|
||||||
{
|
{
|
||||||
int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
|
int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
|
||||||
hist[0].create(1, histSize[0], CV_32S);
|
hist[0].create(1, histSize[0], CV_32S);
|
||||||
@ -336,14 +336,14 @@ namespace
|
|||||||
int buf_size;
|
int buf_size;
|
||||||
get_buf_size(sz, levels, &buf_size);
|
get_buf_size(sz, levels, &buf_size);
|
||||||
|
|
||||||
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
|
BufferPool pool(stream);
|
||||||
GpuMat buf = _buf.getGpuMat();
|
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||||
|
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
|
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
|
||||||
|
|
||||||
if (stream == 0)
|
if (!stream)
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
cudaSafeCall( cudaDeviceSynchronize() );
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -392,7 +392,7 @@ namespace
|
|||||||
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
||||||
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
||||||
|
|
||||||
static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, InputOutputArray _buf, cudaStream_t stream)
|
static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 );
|
CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 );
|
||||||
|
|
||||||
@ -406,8 +406,8 @@ namespace
|
|||||||
int buf_size;
|
int buf_size;
|
||||||
get_buf_size(sz, levels.cols, &buf_size);
|
get_buf_size(sz, levels.cols, &buf_size);
|
||||||
|
|
||||||
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
|
BufferPool pool(stream);
|
||||||
GpuMat buf = _buf.getGpuMat();
|
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||||
|
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -424,7 +424,7 @@ namespace
|
|||||||
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
|
||||||
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
|
||||||
|
|
||||||
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4],InputOutputArray _buf, cudaStream_t stream)
|
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
|
||||||
{
|
{
|
||||||
CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 );
|
CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 );
|
||||||
CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 );
|
CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 );
|
||||||
@ -447,8 +447,8 @@ namespace
|
|||||||
int buf_size;
|
int buf_size;
|
||||||
get_buf_size(sz, nLevels, &buf_size);
|
get_buf_size(sz, nLevels, &buf_size);
|
||||||
|
|
||||||
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
|
BufferPool pool(stream);
|
||||||
GpuMat buf = _buf.getGpuMat();
|
GpuMat buf = pool.getBuffer(1, buf_size, CV_8UC1);
|
||||||
|
|
||||||
NppStreamHandler h(stream);
|
NppStreamHandler h(stream);
|
||||||
|
|
||||||
@ -460,7 +460,7 @@ namespace
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel)
|
void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel, Stream& stream)
|
||||||
{
|
{
|
||||||
const int kind = _levels.kind();
|
const int kind = _levels.kind();
|
||||||
|
|
||||||
@ -475,7 +475,7 @@ void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int
|
|||||||
nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
|
nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
|
||||||
|
|
||||||
if (kind == _InputArray::CUDA_GPU_MAT)
|
if (kind == _InputArray::CUDA_GPU_MAT)
|
||||||
_levels.getGpuMatRef().upload(host_levels);
|
_levels.getGpuMatRef().upload(host_levels, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
namespace hist
|
namespace hist
|
||||||
@ -493,9 +493,9 @@ namespace
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
void cv::cuda::histEven(InputArray _src, OutputArray hist, int histSize, int lowerLevel, int upperLevel, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);
|
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, int levels, int lowerLevel, int upperLevel, Stream& stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
|
NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
|
||||||
@ -514,12 +514,12 @@ void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf,
|
|||||||
|
|
||||||
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
|
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
|
void cv::cuda::histEven(InputArray _src, GpuMat hist[4], int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);
|
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], int levels[4], int lowerLevel[4], int upperLevel[4], Stream& stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
|
NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
|
||||||
@ -532,12 +532,12 @@ void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, i
|
|||||||
|
|
||||||
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
|
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
|
hist_callers[src.depth()](src, hist, histSize, lowerLevel, upperLevel, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream)
|
void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream);
|
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, Stream& stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
|
NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
|
||||||
@ -553,12 +553,12 @@ void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels,
|
|||||||
|
|
||||||
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 );
|
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 );
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
hist_callers[src.depth()](src, hist, levels, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream)
|
void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], Stream& stream)
|
||||||
{
|
{
|
||||||
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream);
|
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], Stream& stream);
|
||||||
static const hist_t hist_callers[] =
|
static const hist_t hist_callers[] =
|
||||||
{
|
{
|
||||||
NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
|
NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
|
||||||
@ -573,7 +573,7 @@ void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4]
|
|||||||
|
|
||||||
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 );
|
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 );
|
||||||
|
|
||||||
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
|
hist_callers[src.depth()](src, hist, levels, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* !defined (HAVE_CUDA) */
|
#endif /* !defined (HAVE_CUDA) */
|
||||||
|
@ -74,7 +74,7 @@ namespace
|
|||||||
public:
|
public:
|
||||||
HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
|
HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
|
||||||
|
|
||||||
void detect(InputArray src, OutputArray circles);
|
void detect(InputArray src, OutputArray circles, Stream& stream);
|
||||||
|
|
||||||
void setDp(float dp) { dp_ = dp; }
|
void setDp(float dp) { dp_ = dp; }
|
||||||
float getDp() const { return dp_; }
|
float getDp() const { return dp_; }
|
||||||
@ -154,8 +154,11 @@ namespace
|
|||||||
filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
|
filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
|
void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles, Stream& stream)
|
||||||
{
|
{
|
||||||
|
// TODO : implement async version
|
||||||
|
(void) stream;
|
||||||
|
|
||||||
using namespace cv::cuda::device::hough;
|
using namespace cv::cuda::device::hough;
|
||||||
using namespace cv::cuda::device::hough_circles;
|
using namespace cv::cuda::device::hough_circles;
|
||||||
|
|
||||||
|
@ -75,8 +75,8 @@ namespace
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void detect(InputArray src, OutputArray lines);
|
void detect(InputArray src, OutputArray lines, Stream& stream);
|
||||||
void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
|
void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes, Stream& stream);
|
||||||
|
|
||||||
void setRho(float rho) { rho_ = rho; }
|
void setRho(float rho) { rho_ = rho; }
|
||||||
float getRho() const { return rho_; }
|
float getRho() const { return rho_; }
|
||||||
@ -125,8 +125,11 @@ namespace
|
|||||||
GpuMat result_;
|
GpuMat result_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
|
void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines, Stream& stream)
|
||||||
{
|
{
|
||||||
|
// TODO : implement async version
|
||||||
|
(void) stream;
|
||||||
|
|
||||||
using namespace cv::cuda::device::hough;
|
using namespace cv::cuda::device::hough;
|
||||||
using namespace cv::cuda::device::hough_lines;
|
using namespace cv::cuda::device::hough_lines;
|
||||||
|
|
||||||
@ -170,7 +173,7 @@ namespace
|
|||||||
result_.copyTo(lines);
|
result_.copyTo(lines);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes)
|
void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes, Stream& stream)
|
||||||
{
|
{
|
||||||
GpuMat d_lines = _d_lines.getGpuMat();
|
GpuMat d_lines = _d_lines.getGpuMat();
|
||||||
|
|
||||||
@ -184,12 +187,18 @@ namespace
|
|||||||
|
|
||||||
CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
|
CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
|
||||||
|
|
||||||
d_lines.row(0).download(h_lines);
|
if (stream)
|
||||||
|
d_lines.row(0).download(h_lines, stream);
|
||||||
|
else
|
||||||
|
d_lines.row(0).download(h_lines);
|
||||||
|
|
||||||
if (h_votes.needed())
|
if (h_votes.needed())
|
||||||
{
|
{
|
||||||
GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
|
GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
|
||||||
d_votes.download(h_votes);
|
if (stream)
|
||||||
|
d_votes.download(h_votes, stream);
|
||||||
|
else
|
||||||
|
d_votes.download(h_votes);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,7 +79,7 @@ namespace
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void detect(InputArray src, OutputArray lines);
|
void detect(InputArray src, OutputArray lines, Stream& stream);
|
||||||
|
|
||||||
void setRho(float rho) { rho_ = rho; }
|
void setRho(float rho) { rho_ = rho; }
|
||||||
float getRho() const { return rho_; }
|
float getRho() const { return rho_; }
|
||||||
@ -128,8 +128,11 @@ namespace
|
|||||||
GpuMat result_;
|
GpuMat result_;
|
||||||
};
|
};
|
||||||
|
|
||||||
void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines)
|
void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines, Stream& stream)
|
||||||
{
|
{
|
||||||
|
// TODO : implement async version
|
||||||
|
(void) stream;
|
||||||
|
|
||||||
using namespace cv::cuda::device::hough;
|
using namespace cv::cuda::device::hough;
|
||||||
using namespace cv::cuda::device::hough_lines;
|
using namespace cv::cuda::device::hough_lines;
|
||||||
using namespace cv::cuda::device::hough_segments;
|
using namespace cv::cuda::device::hough_segments;
|
||||||
|
@ -43,7 +43,7 @@
|
|||||||
|
|
||||||
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
|
||||||
|
|
||||||
void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria) { throw_no_cuda(); }
|
void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
@ -222,7 +222,7 @@ inline int dist2(const cv::Vec2s& lhs, const cv::Vec2s& rhs)
|
|||||||
} // anonymous namespace
|
} // anonymous namespace
|
||||||
|
|
||||||
|
|
||||||
void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria)
|
void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria, Stream& stream)
|
||||||
{
|
{
|
||||||
GpuMat src = _src.getGpuMat();
|
GpuMat src = _src.getGpuMat();
|
||||||
|
|
||||||
@ -235,7 +235,10 @@ void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp,
|
|||||||
|
|
||||||
// Perform mean shift procedure and obtain region and spatial maps
|
// Perform mean shift procedure and obtain region and spatial maps
|
||||||
GpuMat d_rmap, d_spmap;
|
GpuMat d_rmap, d_spmap;
|
||||||
cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
|
cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria, stream);
|
||||||
|
|
||||||
|
stream.waitForCompletion();
|
||||||
|
|
||||||
Mat rmap(d_rmap);
|
Mat rmap(d_rmap);
|
||||||
Mat spmap(d_spmap);
|
Mat spmap(d_spmap);
|
||||||
|
|
||||||
|
@ -1053,12 +1053,11 @@ TEST(equalizeHist)
|
|||||||
|
|
||||||
cuda::GpuMat d_src(src);
|
cuda::GpuMat d_src(src);
|
||||||
cuda::GpuMat d_dst;
|
cuda::GpuMat d_dst;
|
||||||
cuda::GpuMat d_buf;
|
|
||||||
|
|
||||||
cuda::equalizeHist(d_src, d_dst, d_buf);
|
cuda::equalizeHist(d_src, d_dst);
|
||||||
|
|
||||||
CUDA_ON;
|
CUDA_ON;
|
||||||
cuda::equalizeHist(d_src, d_dst, d_buf);
|
cuda::equalizeHist(d_src, d_dst);
|
||||||
CUDA_OFF;
|
CUDA_OFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user