renamed gpuimgproc -> cudaimgproc

This commit is contained in:
Vladislav Vinogradov
2013-07-23 16:59:34 +04:00
parent 219b662127
commit a0ae602bb7
93 changed files with 54 additions and 54 deletions

View File

@@ -0,0 +1,99 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::cuda::bilateralFilter(InputArray, OutputArray, int, float, float, int, Stream&) { throw_no_cuda(); }
#else
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
template<typename T>
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t stream);
}
}}}
void cv::cuda::bilateralFilter(InputArray _src, OutputArray _dst, int kernel_size, float sigma_color, float sigma_spatial, int borderMode, Stream& stream)
{
using cv::cuda::device::imgproc::bilateral_filter_gpu;
typedef void (*func_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, int borderMode, cudaStream_t s);
static const func_t funcs[6][4] =
{
{bilateral_filter_gpu<uchar> , 0 /*bilateral_filter_gpu<uchar2>*/ , bilateral_filter_gpu<uchar3> , bilateral_filter_gpu<uchar4> },
{0 /*bilateral_filter_gpu<schar>*/, 0 /*bilateral_filter_gpu<schar2>*/ , 0 /*bilateral_filter_gpu<schar3>*/, 0 /*bilateral_filter_gpu<schar4>*/},
{bilateral_filter_gpu<ushort> , 0 /*bilateral_filter_gpu<ushort2>*/, bilateral_filter_gpu<ushort3> , bilateral_filter_gpu<ushort4> },
{bilateral_filter_gpu<short> , 0 /*bilateral_filter_gpu<short2>*/ , bilateral_filter_gpu<short3> , bilateral_filter_gpu<short4> },
{0 /*bilateral_filter_gpu<int>*/ , 0 /*bilateral_filter_gpu<int2>*/ , 0 /*bilateral_filter_gpu<int3>*/ , 0 /*bilateral_filter_gpu<int4>*/ },
{bilateral_filter_gpu<float> , 0 /*bilateral_filter_gpu<float2>*/ , bilateral_filter_gpu<float3> , bilateral_filter_gpu<float4> }
};
sigma_color = (sigma_color <= 0 ) ? 1 : sigma_color;
sigma_spatial = (sigma_spatial <= 0 ) ? 1 : sigma_spatial;
int radius = (kernel_size <= 0) ? cvRound(sigma_spatial*1.5) : kernel_size/2;
kernel_size = std::max(radius, 1)*2 + 1;
GpuMat src = _src.getGpuMat();
CV_Assert( src.depth() <= CV_32F && src.channels() <= 4 );
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
const func_t func = funcs[src.depth()][src.channels() - 1];
CV_Assert( func != 0 );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
func(src, dst, kernel_size, sigma_spatial, sigma_color, borderMode, StreamAccessor::getStream(stream));
}
#endif

View File

@@ -0,0 +1,109 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::cuda::blendLinear(InputArray, InputArray, InputArray, InputArray, OutputArray, Stream&) { throw_no_cuda(); }
#else
////////////////////////////////////////////////////////////////////////
// blendLinear
namespace cv { namespace cuda { namespace device
{
namespace blend
{
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream);
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream);
}
}}}
using namespace ::cv::cuda::device::blend;
void cv::cuda::blendLinear(InputArray _img1, InputArray _img2, InputArray _weights1, InputArray _weights2,
OutputArray _result, Stream& stream)
{
GpuMat img1 = _img1.getGpuMat();
GpuMat img2 = _img2.getGpuMat();
GpuMat weights1 = _weights1.getGpuMat();
GpuMat weights2 = _weights2.getGpuMat();
CV_Assert( img1.size() == img2.size() );
CV_Assert( img1.type() == img2.type() );
CV_Assert( weights1.size() == img1.size() );
CV_Assert( weights2.size() == img2.size() );
CV_Assert( weights1.type() == CV_32FC1 );
CV_Assert( weights2.type() == CV_32FC1 );
const Size size = img1.size();
const int depth = img1.depth();
const int cn = img1.channels();
_result.create(size, CV_MAKE_TYPE(depth, cn));
GpuMat result = _result.getGpuMat();
switch (depth)
{
case CV_8U:
if (cn != 4)
blendLinearCaller<uchar>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
else
blendLinearCaller8UC4(size.height, size.width, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
break;
case CV_32F:
blendLinearCaller<float>(size.height, size.width, cn, img1, img2, weights1, weights2, result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(cv::Error::StsUnsupportedFormat, "bad image depth in linear blending function");
}
}
#endif

View File

@@ -0,0 +1,234 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<CannyEdgeDetector> cv::cuda::createCannyEdgeDetector(double, double, int, bool) { throw_no_cuda(); return Ptr<CannyEdgeDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace canny
{
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad);
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh);
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1);
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2);
void getEdges(PtrStepSzi map, PtrStepSzb dst);
}
namespace
{
class CannyImpl : public CannyEdgeDetector
{
public:
CannyImpl(double low_thresh, double high_thresh, int apperture_size, bool L2gradient) :
low_thresh_(low_thresh), high_thresh_(high_thresh), apperture_size_(apperture_size), L2gradient_(L2gradient)
{
old_apperture_size_ = -1;
}
void detect(InputArray image, OutputArray edges);
void detect(InputArray dx, InputArray dy, OutputArray edges);
void setLowThreshold(double low_thresh) { low_thresh_ = low_thresh; }
double getLowThreshold() const { return low_thresh_; }
void setHighThreshold(double high_thresh) { high_thresh_ = high_thresh; }
double getHighThreshold() const { return high_thresh_; }
void setAppertureSize(int apperture_size) { apperture_size_ = apperture_size; }
int getAppertureSize() const { return apperture_size_; }
void setL2Gradient(bool L2gradient) { L2gradient_ = L2gradient; }
bool getL2Gradient() const { return L2gradient_; }
void write(FileStorage& fs) const
{
fs << "name" << "Canny_GPU"
<< "low_thresh" << low_thresh_
<< "high_thresh" << high_thresh_
<< "apperture_size" << apperture_size_
<< "L2gradient" << L2gradient_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "Canny_GPU" );
low_thresh_ = (double)fn["low_thresh"];
high_thresh_ = (double)fn["high_thresh"];
apperture_size_ = (int)fn["apperture_size"];
L2gradient_ = (int)fn["L2gradient"] != 0;
}
private:
void createBuf(Size image_size);
void CannyCaller(GpuMat& edges);
double low_thresh_;
double high_thresh_;
int apperture_size_;
bool L2gradient_;
GpuMat dx_, dy_;
GpuMat mag_;
GpuMat map_;
GpuMat st1_, st2_;
#ifdef HAVE_OPENCV_CUDAFILTERS
Ptr<Filter> filterDX_, filterDY_;
#endif
int old_apperture_size_;
};
void CannyImpl::detect(InputArray _image, OutputArray _edges)
{
GpuMat image = _image.getGpuMat();
CV_Assert( image.type() == CV_8UC1 );
CV_Assert( deviceSupports(SHARED_ATOMICS) );
if (low_thresh_ > high_thresh_)
std::swap(low_thresh_, high_thresh_);
createBuf(image.size());
_edges.create(image.size(), CV_8UC1);
GpuMat edges = _edges.getGpuMat();
if (apperture_size_ == 3)
{
Size wholeSize;
Point ofs;
image.locateROI(wholeSize, ofs);
GpuMat srcWhole(wholeSize, image.type(), image.datastart, image.step);
canny::calcMagnitude(srcWhole, ofs.x, ofs.y, dx_, dy_, mag_, L2gradient_);
}
else
{
#ifndef HAVE_OPENCV_CUDAFILTERS
throw_no_cuda();
#else
filterDX_->apply(image, dx_);
filterDY_->apply(image, dy_);
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
#endif
}
CannyCaller(edges);
}
void CannyImpl::detect(InputArray _dx, InputArray _dy, OutputArray _edges)
{
GpuMat dx = _dx.getGpuMat();
GpuMat dy = _dy.getGpuMat();
CV_Assert( dx.type() == CV_32SC1 );
CV_Assert( dy.type() == dx.type() && dy.size() == dx.size() );
CV_Assert( deviceSupports(SHARED_ATOMICS) );
dx.copyTo(dx_);
dy.copyTo(dy_);
if (low_thresh_ > high_thresh_)
std::swap(low_thresh_, high_thresh_);
createBuf(dx.size());
_edges.create(dx.size(), CV_8UC1);
GpuMat edges = _edges.getGpuMat();
canny::calcMagnitude(dx_, dy_, mag_, L2gradient_);
CannyCaller(edges);
}
void CannyImpl::createBuf(Size image_size)
{
ensureSizeIsEnough(image_size, CV_32SC1, dx_);
ensureSizeIsEnough(image_size, CV_32SC1, dy_);
#ifdef HAVE_OPENCV_CUDAFILTERS
if (apperture_size_ != 3 && apperture_size_ != old_apperture_size_)
{
filterDX_ = cuda::createDerivFilter(CV_8UC1, CV_32S, 1, 0, apperture_size_, false, 1, BORDER_REPLICATE);
filterDY_ = cuda::createDerivFilter(CV_8UC1, CV_32S, 0, 1, apperture_size_, false, 1, BORDER_REPLICATE);
old_apperture_size_ = apperture_size_;
}
#endif
ensureSizeIsEnough(image_size, CV_32FC1, mag_);
ensureSizeIsEnough(image_size, CV_32SC1, map_);
ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st1_);
ensureSizeIsEnough(1, image_size.area(), CV_16UC2, st2_);
}
void CannyImpl::CannyCaller(GpuMat& edges)
{
map_.setTo(Scalar::all(0));
canny::calcMap(dx_, dy_, mag_, map_, static_cast<float>(low_thresh_), static_cast<float>(high_thresh_));
canny::edgesHysteresisLocal(map_, st1_.ptr<ushort2>());
canny::edgesHysteresisGlobal(map_, st1_.ptr<ushort2>(), st2_.ptr<ushort2>());
canny::getEdges(map_, edges);
}
}
Ptr<CannyEdgeDetector> cv::cuda::createCannyEdgeDetector(double low_thresh, double high_thresh, int apperture_size, bool L2gradient)
{
return new CannyImpl(low_thresh, high_thresh, apperture_size, L2gradient);
}
#endif /* !defined (HAVE_CUDA) */

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,189 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_CUDAFILTERS)
Ptr<cuda::CornernessCriteria> cv::cuda::createHarrisCorner(int, int, int, double, int) { throw_no_cuda(); return Ptr<cuda::CornernessCriteria>(); }
Ptr<cuda::CornernessCriteria> cv::cuda::createMinEigenValCorner(int, int, int, int) { throw_no_cuda(); return Ptr<cuda::CornernessCriteria>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream);
}
}}}
namespace
{
class CornerBase : public CornernessCriteria
{
protected:
CornerBase(int srcType, int blockSize, int ksize, int borderType);
void extractCovData(const GpuMat& src, Stream& stream);
int srcType_;
int blockSize_;
int ksize_;
int borderType_;
GpuMat Dx_, Dy_;
private:
Ptr<cuda::Filter> filterDx_, filterDy_;
};
CornerBase::CornerBase(int srcType, int blockSize, int ksize, int borderType) :
srcType_(srcType), blockSize_(blockSize), ksize_(ksize), borderType_(borderType)
{
CV_Assert( borderType_ == BORDER_REFLECT101 || borderType_ == BORDER_REPLICATE || borderType_ == BORDER_REFLECT );
const int sdepth = CV_MAT_DEPTH(srcType_);
const int cn = CV_MAT_CN(srcType_);
CV_Assert( cn == 1 );
double scale = static_cast<double>(1 << ((ksize_ > 0 ? ksize_ : 3) - 1)) * blockSize_;
if (ksize_ < 0)
scale *= 2.;
if (sdepth == CV_8U)
scale *= 255.;
scale = 1./scale;
if (ksize_ > 0)
{
filterDx_ = cuda::createSobelFilter(srcType, CV_32F, 1, 0, ksize_, scale, borderType_);
filterDy_ = cuda::createSobelFilter(srcType, CV_32F, 0, 1, ksize_, scale, borderType_);
}
else
{
filterDx_ = cuda::createScharrFilter(srcType, CV_32F, 1, 0, scale, borderType_);
filterDy_ = cuda::createScharrFilter(srcType, CV_32F, 0, 1, scale, borderType_);
}
}
void CornerBase::extractCovData(const GpuMat& src, Stream& stream)
{
CV_Assert( src.type() == srcType_ );
filterDx_->apply(src, Dx_, stream);
filterDy_->apply(src, Dy_, stream);
}
class Harris : public CornerBase
{
public:
Harris(int srcType, int blockSize, int ksize, double k, int borderType) :
CornerBase(srcType, blockSize, ksize, borderType), k_(static_cast<float>(k))
{
}
void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
private:
float k_;
};
void Harris::compute(InputArray _src, OutputArray _dst, Stream& stream)
{
using namespace cv::cuda::device::imgproc;
GpuMat src = _src.getGpuMat();
extractCovData(src, stream);
_dst.create(src.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
cornerHarris_gpu(blockSize_, k_, Dx_, Dy_, dst, borderType_, StreamAccessor::getStream(stream));
}
class MinEigenVal : public CornerBase
{
public:
MinEigenVal(int srcType, int blockSize, int ksize, int borderType) :
CornerBase(srcType, blockSize, ksize, borderType)
{
}
void compute(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
private:
float k_;
};
void MinEigenVal::compute(InputArray _src, OutputArray _dst, Stream& stream)
{
using namespace cv::cuda::device::imgproc;
GpuMat src = _src.getGpuMat();
extractCovData(src, stream);
_dst.create(src.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
cornerMinEigenVal_gpu(blockSize_, Dx_, Dy_, dst, borderType_, StreamAccessor::getStream(stream));
}
}
Ptr<cuda::CornernessCriteria> cv::cuda::createHarrisCorner(int srcType, int blockSize, int ksize, double k, int borderType)
{
return new Harris(srcType, blockSize, ksize, k, borderType);
}
Ptr<cuda::CornernessCriteria> cv::cuda::createMinEigenValCorner(int srcType, int blockSize, int ksize, int borderType)
{
return new MinEigenVal(srcType, blockSize, ksize, borderType);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,199 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
using namespace cv::cuda;
typedef unsigned char uchar;
typedef unsigned short ushort;
//////////////////////////////////////////////////////////////////////////////////
/// Bilateral filtering
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
__device__ __forceinline__ float norm_l1(const float& a) { return ::fabs(a); }
__device__ __forceinline__ float norm_l1(const float2& a) { return ::fabs(a.x) + ::fabs(a.y); }
__device__ __forceinline__ float norm_l1(const float3& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z); }
__device__ __forceinline__ float norm_l1(const float4& a) { return ::fabs(a.x) + ::fabs(a.y) + ::fabs(a.z) + ::fabs(a.w); }
__device__ __forceinline__ float sqr(const float& a) { return a * a; }
template<typename T, typename B>
__global__ void bilateral_kernel(const PtrStepSz<T> src, PtrStep<T> dst, const B b, const int ksz, const float sigma_spatial2_inv_half, const float sigma_color2_inv_half)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
int x = threadIdx.x + blockIdx.x * blockDim.x;
int y = threadIdx.y + blockIdx.y * blockDim.y;
if (x >= src.cols || y >= src.rows)
return;
value_type center = saturate_cast<value_type>(src(y, x));
value_type sum1 = VecTraits<value_type>::all(0);
float sum2 = 0;
int r = ksz / 2;
float r2 = (float)(r * r);
int tx = x - r + ksz;
int ty = y - r + ksz;
if (x - ksz/2 >=0 && y - ksz/2 >=0 && tx < src.cols && ty < src.rows)
{
for (int cy = y - r; cy < ty; ++cy)
for (int cx = x - r; cx < tx; ++cx)
{
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
if (space2 > r2)
continue;
value_type value = saturate_cast<value_type>(src(cy, cx));
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
sum1 = sum1 + weight * value;
sum2 = sum2 + weight;
}
}
else
{
for (int cy = y - r; cy < ty; ++cy)
for (int cx = x - r; cx < tx; ++cx)
{
float space2 = (x - cx) * (x - cx) + (y - cy) * (y - cy);
if (space2 > r2)
continue;
value_type value = saturate_cast<value_type>(b.at(cy, cx, src.data, src.step));
float weight = ::exp(space2 * sigma_spatial2_inv_half + sqr(norm_l1(value - center)) * sigma_color2_inv_half);
sum1 = sum1 + weight * value;
sum2 = sum2 + weight;
}
}
dst(y, x) = saturate_cast<T>(sum1 / sum2);
}
template<typename T, template <typename> class B>
void bilateral_caller(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream)
{
dim3 block (32, 8);
dim3 grid (divUp (src.cols, block.x), divUp (src.rows, block.y));
B<T> b(src.rows, src.cols);
float sigma_spatial2_inv_half = -0.5f/(sigma_spatial * sigma_spatial);
float sigma_color2_inv_half = -0.5f/(sigma_color * sigma_color);
cudaSafeCall( cudaFuncSetCacheConfig (bilateral_kernel<T, B<T> >, cudaFuncCachePreferL1) );
bilateral_kernel<<<grid, block>>>((PtrStepSz<T>)src, (PtrStepSz<T>)dst, b, kernel_size, sigma_spatial2_inv_half, sigma_color2_inv_half);
cudaSafeCall ( cudaGetLastError () );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template<typename T>
void bilateral_filter_gpu(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float gauss_spatial_coeff, float gauss_color_coeff, int borderMode, cudaStream_t stream)
{
typedef void (*caller_t)(const PtrStepSzb& src, PtrStepSzb dst, int kernel_size, float sigma_spatial, float sigma_color, cudaStream_t stream);
static caller_t funcs[] =
{
bilateral_caller<T, BrdConstant>,
bilateral_caller<T, BrdReplicate>,
bilateral_caller<T, BrdReflect>,
bilateral_caller<T, BrdWrap>,
bilateral_caller<T, BrdReflect101>
};
funcs[borderMode](src, dst, kernel_size, gauss_spatial_coeff, gauss_color_coeff, stream);
}
}
}}}
#define OCV_INSTANTIATE_BILATERAL_FILTER(T) \
template void cv::cuda::device::imgproc::bilateral_filter_gpu<T>(const PtrStepSzb&, PtrStepSzb, int, float, float, int, cudaStream_t);
OCV_INSTANTIATE_BILATERAL_FILTER(uchar)
//OCV_INSTANTIATE_BILATERAL_FILTER(uchar2)
OCV_INSTANTIATE_BILATERAL_FILTER(uchar3)
OCV_INSTANTIATE_BILATERAL_FILTER(uchar4)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar2)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar3)
//OCV_INSTANTIATE_BILATERAL_FILTER(schar4)
OCV_INSTANTIATE_BILATERAL_FILTER(short)
//OCV_INSTANTIATE_BILATERAL_FILTER(short2)
OCV_INSTANTIATE_BILATERAL_FILTER(short3)
OCV_INSTANTIATE_BILATERAL_FILTER(short4)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort)
//OCV_INSTANTIATE_BILATERAL_FILTER(ushort2)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort3)
OCV_INSTANTIATE_BILATERAL_FILTER(ushort4)
//OCV_INSTANTIATE_BILATERAL_FILTER(int)
//OCV_INSTANTIATE_BILATERAL_FILTER(int2)
//OCV_INSTANTIATE_BILATERAL_FILTER(int3)
//OCV_INSTANTIATE_BILATERAL_FILTER(int4)
OCV_INSTANTIATE_BILATERAL_FILTER(float)
//OCV_INSTANTIATE_BILATERAL_FILTER(float2)
OCV_INSTANTIATE_BILATERAL_FILTER(float3)
OCV_INSTANTIATE_BILATERAL_FILTER(float4)
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,121 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
namespace cv { namespace cuda { namespace device
{
namespace blend
{
template <typename T>
__global__ void blendLinearKernel(int rows, int cols, int cn, const PtrStep<T> img1, const PtrStep<T> img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStep<T> result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
int x_ = x / cn;
float w1 = weights1.ptr(y)[x_];
float w2 = weights2.ptr(y)[x_];
T p1 = img1.ptr(y)[x];
T p2 = img2.ptr(y)[x];
result.ptr(y)[x] = (p1 * w1 + p2 * w2) / (w1 + w2 + 1e-5f);
}
}
template <typename T>
void blendLinearCaller(int rows, int cols, int cn, PtrStep<T> img1, PtrStep<T> img2, PtrStepf weights1, PtrStepf weights2, PtrStep<T> result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols * cn, threads.x), divUp(rows, threads.y));
blendLinearKernel<<<grid, threads, 0, stream>>>(rows, cols * cn, cn, img1, img2, weights1, weights2, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
template void blendLinearCaller<uchar>(int, int, int, PtrStep<uchar>, PtrStep<uchar>, PtrStepf, PtrStepf, PtrStep<uchar>, cudaStream_t stream);
template void blendLinearCaller<float>(int, int, int, PtrStep<float>, PtrStep<float>, PtrStepf, PtrStepf, PtrStep<float>, cudaStream_t stream);
__global__ void blendLinearKernel8UC4(int rows, int cols, const PtrStepb img1, const PtrStepb img2,
const PtrStepf weights1, const PtrStepf weights2, PtrStepb result)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y < rows && x < cols)
{
float w1 = weights1.ptr(y)[x];
float w2 = weights2.ptr(y)[x];
float sum_inv = 1.f / (w1 + w2 + 1e-5f);
w1 *= sum_inv;
w2 *= sum_inv;
uchar4 p1 = ((const uchar4*)img1.ptr(y))[x];
uchar4 p2 = ((const uchar4*)img2.ptr(y))[x];
((uchar4*)result.ptr(y))[x] = make_uchar4(p1.x * w1 + p2.x * w2, p1.y * w1 + p2.y * w2,
p1.z * w1 + p2.z * w2, p1.w * w1 + p2.w * w2);
}
}
void blendLinearCaller8UC4(int rows, int cols, PtrStepb img1, PtrStepb img2, PtrStepf weights1, PtrStepf weights2, PtrStepb result, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
blendLinearKernel8UC4<<<grid, threads, 0, stream>>>(rows, cols, img1, img2, weights1, weights2, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
} // namespace blend
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,138 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
namespace cv { namespace cuda { namespace device
{
namespace hough
{
__device__ int g_counter;
template <int PIXELS_PER_THREAD>
__global__ void buildPointList(const PtrStepSzb src, unsigned int* list)
{
__shared__ unsigned int s_queues[4][32 * PIXELS_PER_THREAD];
__shared__ int s_qsize[4];
__shared__ int s_globStart[4];
const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (threadIdx.x == 0)
s_qsize[threadIdx.y] = 0;
__syncthreads();
if (y < src.rows)
{
// fill the queue
const uchar* srcRow = src.ptr(y);
for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < src.cols; ++i, xx += blockDim.x)
{
if (srcRow[xx])
{
const unsigned int val = (y << 16) | xx;
const int qidx = Emulation::smem::atomicAdd(&s_qsize[threadIdx.y], 1);
s_queues[threadIdx.y][qidx] = val;
}
}
}
__syncthreads();
// let one thread reserve the space required in the global list
if (threadIdx.x == 0 && threadIdx.y == 0)
{
// find how many items are stored in each list
int totalSize = 0;
for (int i = 0; i < blockDim.y; ++i)
{
s_globStart[i] = totalSize;
totalSize += s_qsize[i];
}
// calculate the offset in the global list
const int globalOffset = atomicAdd(&g_counter, totalSize);
for (int i = 0; i < blockDim.y; ++i)
s_globStart[i] += globalOffset;
}
__syncthreads();
// copy local queues to global queue
const int qsize = s_qsize[threadIdx.y];
int gidx = s_globStart[threadIdx.y] + threadIdx.x;
for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
list[gidx] = s_queues[threadIdx.y][i];
}
int buildPointList_gpu(PtrStepSzb src, unsigned int* list)
{
const int PIXELS_PER_THREAD = 16;
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(src.cols, block.x * PIXELS_PER_THREAD), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(buildPointList<PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildPointList<PIXELS_PER_THREAD><<<grid, block>>>(src, list);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
}
}}}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,494 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <utility>
#include <algorithm>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/utility.hpp"
using namespace cv::cuda;
using namespace cv::cuda::device;
namespace canny
{
struct L1 : binary_function<int, int, float>
{
__device__ __forceinline__ float operator ()(int x, int y) const
{
return ::abs(x) + ::abs(y);
}
__host__ __device__ __forceinline__ L1() {}
__host__ __device__ __forceinline__ L1(const L1&) {}
};
struct L2 : binary_function<int, int, float>
{
__device__ __forceinline__ float operator ()(int x, int y) const
{
return ::sqrtf(x * x + y * y);
}
__host__ __device__ __forceinline__ L2() {}
__host__ __device__ __forceinline__ L2(const L2&) {}
};
}
namespace cv { namespace cuda { namespace device
{
template <> struct TransformFunctorTraits<canny::L1> : DefaultTransformFunctorTraits<canny::L1>
{
enum { smart_shift = 4 };
};
template <> struct TransformFunctorTraits<canny::L2> : DefaultTransformFunctorTraits<canny::L2>
{
enum { smart_shift = 4 };
};
}}}
namespace canny
{
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_src(false, cudaFilterModePoint, cudaAddressModeClamp);
struct SrcTex
{
int xoff;
int yoff;
__host__ SrcTex(int _xoff, int _yoff) : xoff(_xoff), yoff(_yoff) {}
__device__ __forceinline__ int operator ()(int y, int x) const
{
return tex2D(tex_src, x + xoff, y + yoff);
}
};
template <class Norm> __global__
void calcMagnitudeKernel(const SrcTex src, PtrStepi dx, PtrStepi dy, PtrStepSzf mag, const Norm norm)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (y >= mag.rows || x >= mag.cols)
return;
int dxVal = (src(y - 1, x + 1) + 2 * src(y, x + 1) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y, x - 1) + src(y + 1, x - 1));
int dyVal = (src(y + 1, x - 1) + 2 * src(y + 1, x) + src(y + 1, x + 1)) - (src(y - 1, x - 1) + 2 * src(y - 1, x) + src(y - 1, x + 1));
dx(y, x) = dxVal;
dy(y, x) = dyVal;
mag(y, x) = norm(dxVal, dyVal);
}
void calcMagnitude(PtrStepSzb srcWhole, int xoff, int yoff, PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
{
const dim3 block(16, 16);
const dim3 grid(divUp(mag.cols, block.x), divUp(mag.rows, block.y));
bindTexture(&tex_src, srcWhole);
SrcTex src(xoff, yoff);
if (L2Grad)
{
L2 norm;
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
}
else
{
L1 norm;
calcMagnitudeKernel<<<grid, block>>>(src, dx, dy, mag, norm);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
void calcMagnitude(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, bool L2Grad)
{
if (L2Grad)
{
L2 norm;
transform(dx, dy, mag, norm, WithOutMask(), 0);
}
else
{
L1 norm;
transform(dx, dy, mag, norm, WithOutMask(), 0);
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////
namespace canny
{
texture<float, cudaTextureType2D, cudaReadModeElementType> tex_mag(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void calcMapKernel(const PtrStepSzi dx, const PtrStepi dy, PtrStepi map, const float low_thresh, const float high_thresh)
{
const int CANNY_SHIFT = 15;
const int TG22 = (int)(0.4142135623730950488016887242097*(1<<CANNY_SHIFT) + 0.5);
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x == 0 || x >= dx.cols - 1 || y == 0 || y >= dx.rows - 1)
return;
int dxVal = dx(y, x);
int dyVal = dy(y, x);
const int s = (dxVal ^ dyVal) < 0 ? -1 : 1;
const float m = tex2D(tex_mag, x, y);
dxVal = ::abs(dxVal);
dyVal = ::abs(dyVal);
// 0 - the pixel can not belong to an edge
// 1 - the pixel might belong to an edge
// 2 - the pixel does belong to an edge
int edge_type = 0;
if (m > low_thresh)
{
const int tg22x = dxVal * TG22;
const int tg67x = tg22x + ((dxVal + dxVal) << CANNY_SHIFT);
dyVal <<= CANNY_SHIFT;
if (dyVal < tg22x)
{
if (m > tex2D(tex_mag, x - 1, y) && m >= tex2D(tex_mag, x + 1, y))
edge_type = 1 + (int)(m > high_thresh);
}
else if(dyVal > tg67x)
{
if (m > tex2D(tex_mag, x, y - 1) && m >= tex2D(tex_mag, x, y + 1))
edge_type = 1 + (int)(m > high_thresh);
}
else
{
if (m > tex2D(tex_mag, x - s, y - 1) && m >= tex2D(tex_mag, x + s, y + 1))
edge_type = 1 + (int)(m > high_thresh);
}
}
map(y, x) = edge_type;
}
void calcMap(PtrStepSzi dx, PtrStepSzi dy, PtrStepSzf mag, PtrStepSzi map, float low_thresh, float high_thresh)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dx.cols, block.x), divUp(dx.rows, block.y));
bindTexture(&tex_mag, mag);
calcMapKernel<<<grid, block>>>(dx, dy, map, low_thresh, high_thresh);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
//////////////////////////////////////////////////////////////////////////////////////////
namespace canny
{
__device__ int counter = 0;
__global__ void edgesHysteresisLocalKernel(PtrStepSzi map, ushort2* st)
{
__shared__ volatile int smem[18][18];
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
smem[threadIdx.y + 1][threadIdx.x + 1] = x < map.cols && y < map.rows ? map(y, x) : 0;
if (threadIdx.y == 0)
smem[0][threadIdx.x + 1] = y > 0 ? map(y - 1, x) : 0;
if (threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][threadIdx.x + 1] = y + 1 < map.rows ? map(y + 1, x) : 0;
if (threadIdx.x == 0)
smem[threadIdx.y + 1][0] = x > 0 ? map(y, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1)
smem[threadIdx.y + 1][blockDim.x + 1] = x + 1 < map.cols ? map(y, x + 1) : 0;
if (threadIdx.x == 0 && threadIdx.y == 0)
smem[0][0] = y > 0 && x > 0 ? map(y - 1, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == 0)
smem[0][blockDim.x + 1] = y > 0 && x + 1 < map.cols ? map(y - 1, x + 1) : 0;
if (threadIdx.x == 0 && threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][0] = y + 1 < map.rows && x > 0 ? map(y + 1, x - 1) : 0;
if (threadIdx.x == blockDim.x - 1 && threadIdx.y == blockDim.y - 1)
smem[blockDim.y + 1][blockDim.x + 1] = y + 1 < map.rows && x + 1 < map.cols ? map(y + 1, x + 1) : 0;
__syncthreads();
if (x >= map.cols || y >= map.rows)
return;
int n;
#pragma unroll
for (int k = 0; k < 16; ++k)
{
n = 0;
if (smem[threadIdx.y + 1][threadIdx.x + 1] == 1)
{
n += smem[threadIdx.y ][threadIdx.x ] == 2;
n += smem[threadIdx.y ][threadIdx.x + 1] == 2;
n += smem[threadIdx.y ][threadIdx.x + 2] == 2;
n += smem[threadIdx.y + 1][threadIdx.x ] == 2;
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 2;
n += smem[threadIdx.y + 2][threadIdx.x ] == 2;
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 2;
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 2;
}
if (n > 0)
smem[threadIdx.y + 1][threadIdx.x + 1] = 2;
}
const int e = smem[threadIdx.y + 1][threadIdx.x + 1];
map(y, x) = e;
n = 0;
if (e == 2)
{
n += smem[threadIdx.y ][threadIdx.x ] == 1;
n += smem[threadIdx.y ][threadIdx.x + 1] == 1;
n += smem[threadIdx.y ][threadIdx.x + 2] == 1;
n += smem[threadIdx.y + 1][threadIdx.x ] == 1;
n += smem[threadIdx.y + 1][threadIdx.x + 2] == 1;
n += smem[threadIdx.y + 2][threadIdx.x ] == 1;
n += smem[threadIdx.y + 2][threadIdx.x + 1] == 1;
n += smem[threadIdx.y + 2][threadIdx.x + 2] == 1;
}
if (n > 0)
{
const int ind = ::atomicAdd(&counter, 1);
st[ind] = make_ushort2(x, y);
}
}
void edgesHysteresisLocal(PtrStepSzi map, ushort2* st1)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
const dim3 block(16, 16);
const dim3 grid(divUp(map.cols, block.x), divUp(map.rows, block.y));
edgesHysteresisLocalKernel<<<grid, block>>>(map, st1);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
//////////////////////////////////////////////////////////////////////////////////////////
namespace canny
{
__constant__ int c_dx[8] = {-1, 0, 1, -1, 1, -1, 0, 1};
__constant__ int c_dy[8] = {-1, -1, -1, 0, 0, 1, 1, 1};
__global__ void edgesHysteresisGlobalKernel(PtrStepSzi map, ushort2* st1, ushort2* st2, const int count)
{
const int stack_size = 512;
__shared__ int s_counter;
__shared__ int s_ind;
__shared__ ushort2 s_st[stack_size];
if (threadIdx.x == 0)
s_counter = 0;
__syncthreads();
int ind = blockIdx.y * gridDim.x + blockIdx.x;
if (ind >= count)
return;
ushort2 pos = st1[ind];
if (threadIdx.x < 8)
{
pos.x += c_dx[threadIdx.x];
pos.y += c_dy[threadIdx.x];
if (pos.x > 0 && pos.x < map.cols && pos.y > 0 && pos.y < map.rows && map(pos.y, pos.x) == 1)
{
map(pos.y, pos.x) = 2;
ind = Emulation::smem::atomicAdd(&s_counter, 1);
s_st[ind] = pos;
}
}
__syncthreads();
while (s_counter > 0 && s_counter <= stack_size - blockDim.x)
{
const int subTaskIdx = threadIdx.x >> 3;
const int portion = ::min(s_counter, blockDim.x >> 3);
if (subTaskIdx < portion)
pos = s_st[s_counter - 1 - subTaskIdx];
__syncthreads();
if (threadIdx.x == 0)
s_counter -= portion;
__syncthreads();
if (subTaskIdx < portion)
{
pos.x += c_dx[threadIdx.x & 7];
pos.y += c_dy[threadIdx.x & 7];
if (pos.x > 0 && pos.x < map.cols && pos.y > 0 && pos.y < map.rows && map(pos.y, pos.x) == 1)
{
map(pos.y, pos.x) = 2;
ind = Emulation::smem::atomicAdd(&s_counter, 1);
s_st[ind] = pos;
}
}
__syncthreads();
}
if (s_counter > 0)
{
if (threadIdx.x == 0)
{
ind = ::atomicAdd(&counter, s_counter);
s_ind = ind - s_counter;
}
__syncthreads();
ind = s_ind;
for (int i = threadIdx.x; i < s_counter; i += blockDim.x)
st2[ind + i] = s_st[i];
}
}
void edgesHysteresisGlobal(PtrStepSzi map, ushort2* st1, ushort2* st2)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, canny::counter) );
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
while (count > 0)
{
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
const dim3 block(128);
const dim3 grid(::min(count, 65535u), divUp(count, 65535), 1);
edgesHysteresisGlobalKernel<<<grid, block>>>(map, st1, st2, count);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
std::swap(st1, st2);
}
}
}
//////////////////////////////////////////////////////////////////////////////////////////
namespace canny
{
struct GetEdges : unary_function<int, uchar>
{
__device__ __forceinline__ uchar operator ()(int e) const
{
return (uchar)(-(e >> 1));
}
__host__ __device__ __forceinline__ GetEdges() {}
__host__ __device__ __forceinline__ GetEdges(const GetEdges&) {}
};
}
namespace cv { namespace cuda { namespace device
{
template <> struct TransformFunctorTraits<canny::GetEdges> : DefaultTransformFunctorTraits<canny::GetEdges>
{
enum { smart_shift = 4 };
};
}}}
namespace canny
{
void getEdges(PtrStepSzi map, PtrStepSzb dst)
{
transform(map, dst, GetEdges(), WithOutMask(), 0);
}
}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,186 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/scan.hpp"
#include "opencv2/core/cuda/reduce.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
using namespace cv::cuda;
using namespace cv::cuda::device;
namespace clahe
{
__global__ void calcLutKernel(const PtrStepb src, PtrStepb lut,
const int2 tileSize, const int tilesX,
const int clipLimit, const float lutScale)
{
__shared__ int smem[512];
const int tx = blockIdx.x;
const int ty = blockIdx.y;
const unsigned int tid = threadIdx.y * blockDim.x + threadIdx.x;
smem[tid] = 0;
__syncthreads();
for (int i = threadIdx.y; i < tileSize.y; i += blockDim.y)
{
const uchar* srcPtr = src.ptr(ty * tileSize.y + i) + tx * tileSize.x;
for (int j = threadIdx.x; j < tileSize.x; j += blockDim.x)
{
const int data = srcPtr[j];
Emulation::smem::atomicAdd(&smem[data], 1);
}
}
__syncthreads();
int tHistVal = smem[tid];
__syncthreads();
if (clipLimit > 0)
{
// clip histogram bar
int clipped = 0;
if (tHistVal > clipLimit)
{
clipped = tHistVal - clipLimit;
tHistVal = clipLimit;
}
// find number of overall clipped samples
reduce<256>(smem, clipped, tid, plus<int>());
// broadcast evaluated value
__shared__ int totalClipped;
if (tid == 0)
totalClipped = clipped;
__syncthreads();
// redistribute clipped samples evenly
int redistBatch = totalClipped / 256;
tHistVal += redistBatch;
int residual = totalClipped - redistBatch * 256;
if (tid < residual)
++tHistVal;
}
const int lutVal = blockScanInclusive<256>(tHistVal, smem, tid);
lut(ty * tilesX + tx, tid) = saturate_cast<uchar>(__float2int_rn(lutScale * lutVal));
}
void calcLut(PtrStepSzb src, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(tilesX, tilesY);
calcLutKernel<<<grid, block, 0, stream>>>(src, lut, tileSize, tilesX, clipLimit, lutScale);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void tranformKernel(const PtrStepSzb src, PtrStepb dst, const PtrStepb lut, const int2 tileSize, const int tilesX, const int tilesY)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= src.cols || y >= src.rows)
return;
const float tyf = (static_cast<float>(y) / tileSize.y) - 0.5f;
int ty1 = __float2int_rd(tyf);
int ty2 = ty1 + 1;
const float ya = tyf - ty1;
ty1 = ::max(ty1, 0);
ty2 = ::min(ty2, tilesY - 1);
const float txf = (static_cast<float>(x) / tileSize.x) - 0.5f;
int tx1 = __float2int_rd(txf);
int tx2 = tx1 + 1;
const float xa = txf - tx1;
tx1 = ::max(tx1, 0);
tx2 = ::min(tx2, tilesX - 1);
const int srcVal = src(y, x);
float res = 0;
res += lut(ty1 * tilesX + tx1, srcVal) * ((1.0f - xa) * (1.0f - ya));
res += lut(ty1 * tilesX + tx2, srcVal) * ((xa) * (1.0f - ya));
res += lut(ty2 * tilesX + tx1, srcVal) * ((1.0f - xa) * (ya));
res += lut(ty2 * tilesX + tx2, srcVal) * ((xa) * (ya));
dst(y, x) = saturate_cast<uchar>(res);
}
void transform(PtrStepSzb src, PtrStepSzb dst, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(tranformKernel, cudaFuncCachePreferL1) );
tranformKernel<<<grid, block, 0, stream>>>(src, dst, lut, tileSize, tilesX, tilesY);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
#endif // CUDA_DISABLER

View File

@@ -0,0 +1,461 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/color.hpp"
#include "cvt_color_internal.h"
namespace cv { namespace cuda { namespace device
{
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_x = 8 };
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr555_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr555_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_bgr565_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_bgr565_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_bgra_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr555_to_rgba_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_bgra_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgr565_to_rgba_traits::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr555_traits::functor_type)
{
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(gray_to_bgr565_traits::functor_type)
{
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_yuv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_yuv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(yuv4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_YCrCb4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_YCrCb4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(YCrCb4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_xyz4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_xyz4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(xyz4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hsv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hsv4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hsv4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(bgra_to_hls4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(rgba_to_hls4_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_bgra_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
OPENCV_GPU_TRANSFORM_FUNCTOR_TRAITS(hls4_to_rgba_traits<uchar>::functor_type)
{
enum { smart_block_dim_y = 8 };
enum { smart_shift = 4 };
};
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, traits) \
void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream) \
{ \
traits::functor_type functor = traits::create_functor(); \
typedef typename traits::functor_type::argument_type src_t; \
typedef typename traits::functor_type::result_type dst_t; \
cv::cuda::device::transform((PtrStepSz<src_t>)src, (PtrStepSz<dst_t>)dst, functor, WithOutMask(), stream); \
}
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name, name ## _traits)
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _16u, name ## _traits<ushort>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>)
#define OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(name) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _8u, name ## _traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _32f, name ## _traits<float>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_8u, name ## _full_traits<uchar>) \
OPENCV_GPU_IMPLEMENT_CVTCOLOR(name ## _full_32f, name ## _full_traits<float>)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgb_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgra_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(rgba_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(gray_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr555)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(gray_to_bgr565)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr555_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE(bgr565_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_gray)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_yuv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(yuv4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_YCrCb4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(YCrCb4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgb_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(rgba_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgr_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(bgra_to_xyz4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL(xyz4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hsv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hsv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hsv4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgb_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(rgba_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hls)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgr_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(bgra_to_hls4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL(hls4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_lab)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_lab4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lrgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lrgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lrgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lrgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lbgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lbgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab_to_lbgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lab4_to_lbgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgb_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(rgba_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgr_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(bgra_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgb_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lrgba_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_luv)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgr_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(lbgra_to_luv4)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_rgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_rgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_bgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_bgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lrgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lrgb)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lrgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lrgba)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lbgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lbgr)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv_to_lbgra)
OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F(luv4_to_lbgra)
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ONE
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_ALL
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F
#undef OPENCV_GPU_IMPLEMENT_CVTCOLOR_8U32F_FULL
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,280 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDAFILTERS
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
float a = 0.f;
float b = 0.f;
float c = 0.f;
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (int i = ibegin; i < iend; ++i)
{
for (int j = jbegin; j < jend; ++j)
{
float dx = tex2D(harrisDxTex, j, i);
float dy = tex2D(harrisDyTex, j, i);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
dst(y, x) = a * c - b * b - k * (a + c) * (a + c);
}
}
template <typename BR, typename BC>
__global__ void cornerHarris_kernel(const int block_size, const float k, PtrStepSzf dst, const BR border_row, const BC border_col)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
float a = 0.f;
float b = 0.f;
float c = 0.f;
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (int i = ibegin; i < iend; ++i)
{
const int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j)
{
const int x = border_row.idx_col(j);
float dx = tex2D(harrisDxTex, x, y);
float dy = tex2D(harrisDyTex, x, y);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
dst(y, x) = a * c - b * b - k * (a + c) * (a + c);
}
}
void cornerHarris_gpu(int block_size, float k, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
bindTexture(&harrisDxTex, Dx);
bindTexture(&harrisDyTex, Dy);
switch (border_type)
{
case BORDER_REFLECT101:
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
break;
case BORDER_REFLECT:
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
break;
case BORDER_REPLICATE:
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst);
break;
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
float a = 0.f;
float b = 0.f;
float c = 0.f;
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (int i = ibegin; i < iend; ++i)
{
for (int j = jbegin; j < jend; ++j)
{
float dx = tex2D(minEigenValDxTex, j, i);
float dy = tex2D(minEigenValDyTex, j, i);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
a *= 0.5f;
c *= 0.5f;
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
}
}
template <typename BR, typename BC>
__global__ void cornerMinEigenVal_kernel(const int block_size, PtrStepSzf dst, const BR border_row, const BC border_col)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
float a = 0.f;
float b = 0.f;
float c = 0.f;
const int ibegin = y - (block_size / 2);
const int jbegin = x - (block_size / 2);
const int iend = ibegin + block_size;
const int jend = jbegin + block_size;
for (int i = ibegin; i < iend; ++i)
{
int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j)
{
int x = border_row.idx_col(j);
float dx = tex2D(minEigenValDxTex, x, y);
float dy = tex2D(minEigenValDyTex, x, y);
a += dx * dx;
b += dx * dy;
c += dy * dy;
}
}
a *= 0.5f;
c *= 0.5f;
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
}
}
void cornerMinEigenVal_gpu(int block_size, PtrStepSzf Dx, PtrStepSzf Dy, PtrStepSzf dst, int border_type, cudaStream_t stream)
{
dim3 block(32, 8);
dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
bindTexture(&minEigenValDxTex, Dx);
bindTexture(&minEigenValDyTex, Dy);
switch (border_type)
{
case BORDER_REFLECT101:
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
break;
case BORDER_REFLECT:
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
break;
case BORDER_REPLICATE:
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst);
break;
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize());
}
}
}}}
#endif // HAVE_OPENCV_CUDAFILTERS
#endif // CUDA_DISABLER

View File

@@ -0,0 +1,544 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/limits.hpp"
#include "opencv2/core/cuda/color.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
namespace cv { namespace cuda { namespace device
{
template <typename T> struct Bayer2BGR;
template <> struct Bayer2BGR<uchar>
{
uchar3 res0;
uchar3 res1;
uchar3 res2;
uchar3 res3;
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
{
uchar4 patch[3][3];
patch[0][1] = ((const uchar4*) src.ptr(s_y - 1))[s_x];
patch[0][0] = ((const uchar4*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
patch[0][2] = ((const uchar4*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
patch[1][1] = ((const uchar4*) src.ptr(s_y))[s_x];
patch[1][0] = ((const uchar4*) src.ptr(s_y))[::max(s_x - 1, 0)];
patch[1][2] = ((const uchar4*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
patch[2][1] = ((const uchar4*) src.ptr(s_y + 1))[s_x];
patch[2][0] = ((const uchar4*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
patch[2][2] = ((const uchar4*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 3) >> 2) - 1)];
if ((s_y & 1) ^ start_with_green)
{
const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
const int t1 = (patch[1][0].w + patch[1][1].y + 1) >> 1;
const int t2 = (patch[0][1].x + patch[0][1].z + patch[2][1].x + patch[2][1].z + 2) >> 2;
const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][1].z + patch[2][1].y + 2) >> 2;
const int t4 = (patch[0][1].z + patch[2][1].z + 1) >> 1;
const int t5 = (patch[1][1].y + patch[1][1].w + 1) >> 1;
const int t6 = (patch[0][1].z + patch[0][2].x + patch[2][1].z + patch[2][2].x + 2) >> 2;
const int t7 = (patch[0][1].w + patch[1][1].z + patch[1][2].x + patch[2][1].w + 2) >> 2;
if ((s_y & 1) ^ blue_last)
{
res0.x = t1;
res0.y = patch[1][1].x;
res0.z = t0;
res1.x = patch[1][1].y;
res1.y = t3;
res1.z = t2;
res2.x = t5;
res2.y = patch[1][1].z;
res2.z = t4;
res3.x = patch[1][1].w;
res3.y = t7;
res3.z = t6;
}
else
{
res0.x = t0;
res0.y = patch[1][1].x;
res0.z = t1;
res1.x = t2;
res1.y = t3;
res1.z = patch[1][1].y;
res2.x = t4;
res2.y = patch[1][1].z;
res2.z = t5;
res3.x = t6;
res3.y = t7;
res3.z = patch[1][1].w;
}
}
else
{
const int t0 = (patch[0][0].w + patch[0][1].y + patch[2][0].w + patch[2][1].y + 2) >> 2;
const int t1 = (patch[0][1].x + patch[1][0].w + patch[1][1].y + patch[2][1].x + 2) >> 2;
const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
const int t3 = (patch[1][1].x + patch[1][1].z + 1) >> 1;
const int t4 = (patch[0][1].y + patch[0][1].w + patch[2][1].y + patch[2][1].w + 2) >> 2;
const int t5 = (patch[0][1].z + patch[1][1].y + patch[1][1].w + patch[2][1].z + 2) >> 2;
const int t6 = (patch[0][1].w + patch[2][1].w + 1) >> 1;
const int t7 = (patch[1][1].z + patch[1][2].x + 1) >> 1;
if ((s_y & 1) ^ blue_last)
{
res0.x = patch[1][1].x;
res0.y = t1;
res0.z = t0;
res1.x = t3;
res1.y = patch[1][1].y;
res1.z = t2;
res2.x = patch[1][1].z;
res2.y = t5;
res2.z = t4;
res3.x = t7;
res3.y = patch[1][1].w;
res3.z = t6;
}
else
{
res0.x = t0;
res0.y = t1;
res0.z = patch[1][1].x;
res1.x = t2;
res1.y = patch[1][1].y;
res1.z = t3;
res2.x = t4;
res2.y = t5;
res2.z = patch[1][1].z;
res3.x = t6;
res3.y = patch[1][1].w;
res3.z = t7;
}
}
}
};
template <typename D> __device__ __forceinline__ D toDst(const uchar3& pix);
template <> __device__ __forceinline__ uchar toDst<uchar>(const uchar3& pix)
{
typename bgr_to_gray_traits<uchar>::functor_type f = bgr_to_gray_traits<uchar>::create_functor();
return f(pix);
}
template <> __device__ __forceinline__ uchar3 toDst<uchar3>(const uchar3& pix)
{
return pix;
}
template <> __device__ __forceinline__ uchar4 toDst<uchar4>(const uchar3& pix)
{
return make_uchar4(pix.x, pix.y, pix.z, 255);
}
template <typename D>
__global__ void Bayer2BGR_8u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
if (s_y >= src.rows || (s_x << 2) >= src.cols)
return;
s_y = ::min(::max(s_y, 1), src.rows - 2);
Bayer2BGR<uchar> bayer;
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 2;
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
dst(d_y, d_x) = toDst<D>(bayer.res0);
if (d_x + 1 < src.cols)
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
if (d_x + 2 < src.cols)
dst(d_y, d_x + 2) = toDst<D>(bayer.res2);
if (d_x + 3 < src.cols)
dst(d_y, d_x + 3) = toDst<D>(bayer.res3);
}
template <> struct Bayer2BGR<ushort>
{
ushort3 res0;
ushort3 res1;
__device__ void apply(const PtrStepSzb& src, int s_x, int s_y, bool blue_last, bool start_with_green)
{
ushort2 patch[3][3];
patch[0][1] = ((const ushort2*) src.ptr(s_y - 1))[s_x];
patch[0][0] = ((const ushort2*) src.ptr(s_y - 1))[::max(s_x - 1, 0)];
patch[0][2] = ((const ushort2*) src.ptr(s_y - 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
patch[1][1] = ((const ushort2*) src.ptr(s_y))[s_x];
patch[1][0] = ((const ushort2*) src.ptr(s_y))[::max(s_x - 1, 0)];
patch[1][2] = ((const ushort2*) src.ptr(s_y))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
patch[2][1] = ((const ushort2*) src.ptr(s_y + 1))[s_x];
patch[2][0] = ((const ushort2*) src.ptr(s_y + 1))[::max(s_x - 1, 0)];
patch[2][2] = ((const ushort2*) src.ptr(s_y + 1))[::min(s_x + 1, ((src.cols + 1) >> 1) - 1)];
if ((s_y & 1) ^ start_with_green)
{
const int t0 = (patch[0][1].x + patch[2][1].x + 1) >> 1;
const int t1 = (patch[1][0].y + patch[1][1].y + 1) >> 1;
const int t2 = (patch[0][1].x + patch[0][2].x + patch[2][1].x + patch[2][2].x + 2) >> 2;
const int t3 = (patch[0][1].y + patch[1][1].x + patch[1][2].x + patch[2][1].y + 2) >> 2;
if ((s_y & 1) ^ blue_last)
{
res0.x = t1;
res0.y = patch[1][1].x;
res0.z = t0;
res1.x = patch[1][1].y;
res1.y = t3;
res1.z = t2;
}
else
{
res0.x = t0;
res0.y = patch[1][1].x;
res0.z = t1;
res1.x = t2;
res1.y = t3;
res1.z = patch[1][1].y;
}
}
else
{
const int t0 = (patch[0][0].y + patch[0][1].y + patch[2][0].y + patch[2][1].y + 2) >> 2;
const int t1 = (patch[0][1].x + patch[1][0].y + patch[1][1].y + patch[2][1].x + 2) >> 2;
const int t2 = (patch[0][1].y + patch[2][1].y + 1) >> 1;
const int t3 = (patch[1][1].x + patch[1][2].x + 1) >> 1;
if ((s_y & 1) ^ blue_last)
{
res0.x = patch[1][1].x;
res0.y = t1;
res0.z = t0;
res1.x = t3;
res1.y = patch[1][1].y;
res1.z = t2;
}
else
{
res0.x = t0;
res0.y = t1;
res0.z = patch[1][1].x;
res1.x = t2;
res1.y = patch[1][1].y;
res1.z = t3;
}
}
}
};
template <typename D> __device__ __forceinline__ D toDst(const ushort3& pix);
template <> __device__ __forceinline__ ushort toDst<ushort>(const ushort3& pix)
{
typename bgr_to_gray_traits<ushort>::functor_type f = bgr_to_gray_traits<ushort>::create_functor();
return f(pix);
}
template <> __device__ __forceinline__ ushort3 toDst<ushort3>(const ushort3& pix)
{
return pix;
}
template <> __device__ __forceinline__ ushort4 toDst<ushort4>(const ushort3& pix)
{
return make_ushort4(pix.x, pix.y, pix.z, numeric_limits<ushort>::max());
}
template <typename D>
__global__ void Bayer2BGR_16u(const PtrStepSzb src, PtrStep<D> dst, const bool blue_last, const bool start_with_green)
{
const int s_x = blockIdx.x * blockDim.x + threadIdx.x;
int s_y = blockIdx.y * blockDim.y + threadIdx.y;
if (s_y >= src.rows || (s_x << 1) >= src.cols)
return;
s_y = ::min(::max(s_y, 1), src.rows - 2);
Bayer2BGR<ushort> bayer;
bayer.apply(src, s_x, s_y, blue_last, start_with_green);
const int d_x = (blockIdx.x * blockDim.x + threadIdx.x) << 1;
const int d_y = blockIdx.y * blockDim.y + threadIdx.y;
dst(d_y, d_x) = toDst<D>(bayer.res0);
if (d_x + 1 < src.cols)
dst(d_y, d_x + 1) = toDst<D>(bayer.res1);
}
template <int cn>
void Bayer2BGR_8u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, 4 * block.x), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_8u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_8u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <int cn>
void Bayer2BGR_16u_gpu(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream)
{
typedef typename TypeVec<ushort, cn>::vec_type dst_t;
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, 2 * block.x), divUp(src.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Bayer2BGR_16u<dst_t>, cudaFuncCachePreferL1) );
Bayer2BGR_16u<dst_t><<<grid, block, 0, stream>>>(src, (PtrStepSz<dst_t>)dst, blue_last, start_with_green);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void Bayer2BGR_8u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_8u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<1>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<3>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
template void Bayer2BGR_16u_gpu<4>(PtrStepSzb src, PtrStepSzb dst, bool blue_last, bool start_with_green, cudaStream_t stream);
//////////////////////////////////////////////////////////////
// Bayer Demosaicing (Malvar, He, and Cutler)
//
// by Morgan McGuire, Williams College
// http://graphics.cs.williams.edu/papers/BayerJGT09/#shaders
//
// ported to CUDA
texture<uchar, cudaTextureType2D, cudaReadModeElementType> sourceTex(false, cudaFilterModePoint, cudaAddressModeClamp);
template <typename DstType>
__global__ void MHCdemosaic(PtrStepSz<DstType> dst, const int2 sourceOffset, const int2 firstRed)
{
const float kAx = -1.0f / 8.0f, kAy = -1.5f / 8.0f, kAz = 0.5f / 8.0f /*kAw = -1.0f / 8.0f*/;
const float kBx = 2.0f / 8.0f, /*kBy = 0.0f / 8.0f,*/ /*kBz = 0.0f / 8.0f,*/ kBw = 4.0f / 8.0f ;
const float kCx = 4.0f / 8.0f, kCy = 6.0f / 8.0f, kCz = 5.0f / 8.0f /*kCw = 5.0f / 8.0f*/;
const float /*kDx = 0.0f / 8.0f,*/ kDy = 2.0f / 8.0f, kDz = -1.0f / 8.0f /*kDw = -1.0f / 8.0f*/;
const float kEx = -1.0f / 8.0f, kEy = -1.5f / 8.0f, /*kEz = -1.0f / 8.0f,*/ kEw = 0.5f / 8.0f ;
const float kFx = 2.0f / 8.0f, /*kFy = 0.0f / 8.0f,*/ kFz = 4.0f / 8.0f /*kFw = 0.0f / 8.0f*/;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x == 0 || x >= dst.cols - 1 || y == 0 || y >= dst.rows - 1)
return;
int2 center;
center.x = x + sourceOffset.x;
center.y = y + sourceOffset.y;
int4 xCoord;
xCoord.x = center.x - 2;
xCoord.y = center.x - 1;
xCoord.z = center.x + 1;
xCoord.w = center.x + 2;
int4 yCoord;
yCoord.x = center.y - 2;
yCoord.y = center.y - 1;
yCoord.z = center.y + 1;
yCoord.w = center.y + 2;
float C = tex2D(sourceTex, center.x, center.y); // ( 0, 0)
float4 Dvec;
Dvec.x = tex2D(sourceTex, xCoord.y, yCoord.y); // (-1,-1)
Dvec.y = tex2D(sourceTex, xCoord.y, yCoord.z); // (-1, 1)
Dvec.z = tex2D(sourceTex, xCoord.z, yCoord.y); // ( 1,-1)
Dvec.w = tex2D(sourceTex, xCoord.z, yCoord.z); // ( 1, 1)
float4 value;
value.x = tex2D(sourceTex, center.x, yCoord.x); // ( 0,-2) A0
value.y = tex2D(sourceTex, center.x, yCoord.y); // ( 0,-1) B0
value.z = tex2D(sourceTex, xCoord.x, center.y); // (-2, 0) E0
value.w = tex2D(sourceTex, xCoord.y, center.y); // (-1, 0) F0
// (A0 + A1), (B0 + B1), (E0 + E1), (F0 + F1)
value.x += tex2D(sourceTex, center.x, yCoord.w); // ( 0, 2) A1
value.y += tex2D(sourceTex, center.x, yCoord.z); // ( 0, 1) B1
value.z += tex2D(sourceTex, xCoord.w, center.y); // ( 2, 0) E1
value.w += tex2D(sourceTex, xCoord.z, center.y); // ( 1, 0) F1
float4 PATTERN;
PATTERN.x = kCx * C;
PATTERN.y = kCy * C;
PATTERN.z = kCz * C;
PATTERN.w = PATTERN.z;
float D = Dvec.x + Dvec.y + Dvec.z + Dvec.w;
// There are five filter patterns (identity, cross, checker,
// theta, phi). Precompute the terms from all of them and then
// use swizzles to assign to color channels.
//
// Channel Matches
// x cross (e.g., EE G)
// y checker (e.g., EE B)
// z theta (e.g., EO R)
// w phi (e.g., EO B)
#define A value.x // A0 + A1
#define B value.y // B0 + B1
#define E value.z // E0 + E1
#define F value.w // F0 + F1
float3 temp;
// PATTERN.yzw += (kD.yz * D).xyy;
temp.x = kDy * D;
temp.y = kDz * D;
PATTERN.y += temp.x;
PATTERN.z += temp.y;
PATTERN.w += temp.y;
// PATTERN += (kA.xyz * A).xyzx;
temp.x = kAx * A;
temp.y = kAy * A;
temp.z = kAz * A;
PATTERN.x += temp.x;
PATTERN.y += temp.y;
PATTERN.z += temp.z;
PATTERN.w += temp.x;
// PATTERN += (kE.xyw * E).xyxz;
temp.x = kEx * E;
temp.y = kEy * E;
temp.z = kEw * E;
PATTERN.x += temp.x;
PATTERN.y += temp.y;
PATTERN.z += temp.x;
PATTERN.w += temp.z;
// PATTERN.xw += kB.xw * B;
PATTERN.x += kBx * B;
PATTERN.w += kBw * B;
// PATTERN.xz += kF.xz * F;
PATTERN.x += kFx * F;
PATTERN.z += kFz * F;
// Determine which of four types of pixels we are on.
int2 alternate;
alternate.x = (x + firstRed.x) % 2;
alternate.y = (y + firstRed.y) % 2;
// in BGR sequence;
uchar3 pixelColor =
(alternate.y == 0) ?
((alternate.x == 0) ?
make_uchar3(saturate_cast<uchar>(PATTERN.y), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(C)) :
make_uchar3(saturate_cast<uchar>(PATTERN.w), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.z))) :
((alternate.x == 0) ?
make_uchar3(saturate_cast<uchar>(PATTERN.z), saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.w)) :
make_uchar3(saturate_cast<uchar>(C), saturate_cast<uchar>(PATTERN.x), saturate_cast<uchar>(PATTERN.y)));
dst(y, x) = toDst<DstType>(pixelColor);
}
template <int cn>
void MHCdemosaic(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream)
{
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
bindTexture(&sourceTex, src);
MHCdemosaic<dst_t><<<grid, block, 0, stream>>>((PtrStepSz<dst_t>)dst, sourceOffset, firstRed);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void MHCdemosaic<1>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<3>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
template void MHCdemosaic<4>(PtrStepSzb src, int2 sourceOffset, PtrStepSzb dst, int2 firstRed, cudaStream_t stream);
}}}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,824 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/transform.h>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDAARITHM
namespace cv { namespace cuda { namespace device
{
namespace ght
{
__device__ int g_counter;
template <typename T, int PIXELS_PER_THREAD>
__global__ void buildEdgePointList(const PtrStepSzb edges, const PtrStep<T> dx, const PtrStep<T> dy,
unsigned int* coordList, float* thetaList)
{
__shared__ unsigned int s_coordLists[4][32 * PIXELS_PER_THREAD];
__shared__ float s_thetaLists[4][32 * PIXELS_PER_THREAD];
__shared__ int s_sizes[4];
__shared__ int s_globStart[4];
const int x = blockIdx.x * blockDim.x * PIXELS_PER_THREAD + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (threadIdx.x == 0)
s_sizes[threadIdx.y] = 0;
__syncthreads();
if (y < edges.rows)
{
// fill the queue
const uchar* edgesRow = edges.ptr(y);
const T* dxRow = dx.ptr(y);
const T* dyRow = dy.ptr(y);
for (int i = 0, xx = x; i < PIXELS_PER_THREAD && xx < edges.cols; ++i, xx += blockDim.x)
{
const T dxVal = dxRow[xx];
const T dyVal = dyRow[xx];
if (edgesRow[xx] && (dxVal != 0 || dyVal != 0))
{
const unsigned int coord = (y << 16) | xx;
float theta = ::atan2f(dyVal, dxVal);
if (theta < 0)
theta += 2.0f * CV_PI_F;
const int qidx = Emulation::smem::atomicAdd(&s_sizes[threadIdx.y], 1);
s_coordLists[threadIdx.y][qidx] = coord;
s_thetaLists[threadIdx.y][qidx] = theta;
}
}
}
__syncthreads();
// let one thread reserve the space required in the global list
if (threadIdx.x == 0 && threadIdx.y == 0)
{
// find how many items are stored in each list
int totalSize = 0;
for (int i = 0; i < blockDim.y; ++i)
{
s_globStart[i] = totalSize;
totalSize += s_sizes[i];
}
// calculate the offset in the global list
const int globalOffset = atomicAdd(&g_counter, totalSize);
for (int i = 0; i < blockDim.y; ++i)
s_globStart[i] += globalOffset;
}
__syncthreads();
// copy local queues to global queue
const int qsize = s_sizes[threadIdx.y];
int gidx = s_globStart[threadIdx.y] + threadIdx.x;
for(int i = threadIdx.x; i < qsize; i += blockDim.x, gidx += blockDim.x)
{
coordList[gidx] = s_coordLists[threadIdx.y][i];
thetaList[gidx] = s_thetaLists[threadIdx.y][i];
}
}
template <typename T>
int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList)
{
const int PIXELS_PER_THREAD = 8;
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 4);
const dim3 grid(divUp(edges.cols, block.x * PIXELS_PER_THREAD), divUp(edges.rows, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(buildEdgePointList<T, PIXELS_PER_THREAD>, cudaFuncCachePreferShared) );
buildEdgePointList<T, PIXELS_PER_THREAD><<<grid, block>>>(edges, (PtrStepSz<T>) dx, (PtrStepSz<T>) dy, coordList, thetaList);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
template int buildEdgePointList_gpu<short>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
template int buildEdgePointList_gpu<int>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
template int buildEdgePointList_gpu<float>(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
__global__ void buildRTable(const unsigned int* coordList, const float* thetaList, const int pointsCount,
PtrStep<short2> r_table, int* r_sizes, int maxSize,
const short2 templCenter, const float thetaScale)
{
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= pointsCount)
return;
const unsigned int coord = coordList[tid];
short2 p;
p.x = (coord & 0xFFFF);
p.y = (coord >> 16) & 0xFFFF;
const float theta = thetaList[tid];
const int n = __float2int_rn(theta * thetaScale);
const int ind = ::atomicAdd(r_sizes + n, 1);
if (ind < maxSize)
r_table(n, ind) = saturate_cast<short2>(p - templCenter);
}
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, int* r_sizes,
short2 templCenter, int levels)
{
const dim3 block(256);
const dim3 grid(divUp(pointsCount, block.x));
const float thetaScale = levels / (2.0f * CV_PI_F);
buildRTable<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, r_table.cols, templCenter, thetaScale);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// Ballard_Pos
__global__ void Ballard_Pos_calcHist(const unsigned int* coordList, const float* thetaList, const int pointsCount,
const PtrStep<short2> r_table, const int* r_sizes,
PtrStepSzi hist,
const float idp, const float thetaScale)
{
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= pointsCount)
return;
const unsigned int coord = coordList[tid];
short2 p;
p.x = (coord & 0xFFFF);
p.y = (coord >> 16) & 0xFFFF;
const float theta = thetaList[tid];
const int n = __float2int_rn(theta * thetaScale);
const short2* r_row = r_table.ptr(n);
const int r_row_size = r_sizes[n];
for (int j = 0; j < r_row_size; ++j)
{
short2 c = saturate_cast<short2>(p - r_row[j]);
c.x = __float2int_rn(c.x * idp);
c.y = __float2int_rn(c.y * idp);
if (c.x >= 0 && c.x < hist.cols - 2 && c.y >= 0 && c.y < hist.rows - 2)
::atomicAdd(hist.ptr(c.y + 1) + c.x + 1, 1);
}
}
void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepSzi hist,
float dp, int levels)
{
const dim3 block(256);
const dim3 grid(divUp(pointsCount, block.x));
const float idp = 1.0f / dp;
const float thetaScale = levels / (2.0f * CV_PI_F);
Ballard_Pos_calcHist<<<grid, block>>>(coordList, thetaList, pointsCount, r_table, r_sizes, hist, idp, thetaScale);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void Ballard_Pos_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes,
const int maxSize, const float dp, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= hist.cols - 2 || y >= hist.rows - 2)
return;
const int curVotes = hist(y + 1, x + 1);
if (curVotes > threshold &&
curVotes > hist(y + 1, x) &&
curVotes >= hist(y + 1, x + 2) &&
curVotes > hist(y, x + 1) &&
curVotes >= hist(y + 2, x + 1))
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
{
out[ind] = make_float4(x * dp, y * dp, 1.0f, 0.0f);
votes[ind] = make_int3(curVotes, 0, 0);
}
}
}
int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Ballard_Pos_findPosInHist, cudaFuncCachePreferL1) );
Ballard_Pos_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize, dp, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// Guil_Full
struct FeatureTable
{
uchar* p1_pos_data;
size_t p1_pos_step;
uchar* p1_theta_data;
size_t p1_theta_step;
uchar* p2_pos_data;
size_t p2_pos_step;
uchar* d12_data;
size_t d12_step;
uchar* r1_data;
size_t r1_step;
uchar* r2_data;
size_t r2_step;
};
__constant__ FeatureTable c_templFeatures;
__constant__ FeatureTable c_imageFeatures;
void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
{
FeatureTable tbl;
tbl.p1_pos_data = p1_pos.data;
tbl.p1_pos_step = p1_pos.step;
tbl.p1_theta_data = p1_theta.data;
tbl.p1_theta_step = p1_theta.step;
tbl.p2_pos_data = p2_pos.data;
tbl.p2_pos_step = p2_pos.step;
tbl.d12_data = d12.data;
tbl.d12_step = d12.step;
tbl.r1_data = r1.data;
tbl.r1_step = r1.step;
tbl.r2_data = r2.data;
tbl.r2_step = r2.step;
cudaSafeCall( cudaMemcpyToSymbol(c_templFeatures, &tbl, sizeof(FeatureTable)) );
}
void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2)
{
FeatureTable tbl;
tbl.p1_pos_data = p1_pos.data;
tbl.p1_pos_step = p1_pos.step;
tbl.p1_theta_data = p1_theta.data;
tbl.p1_theta_step = p1_theta.step;
tbl.p2_pos_data = p2_pos.data;
tbl.p2_pos_step = p2_pos.step;
tbl.d12_data = d12.data;
tbl.d12_step = d12.step;
tbl.r1_data = r1.data;
tbl.r1_step = r1.step;
tbl.r2_data = r2.data;
tbl.r2_step = r2.step;
cudaSafeCall( cudaMemcpyToSymbol(c_imageFeatures, &tbl, sizeof(FeatureTable)) );
}
struct TemplFeatureTable
{
static __device__ float2* p1_pos(int n)
{
return (float2*)(c_templFeatures.p1_pos_data + n * c_templFeatures.p1_pos_step);
}
static __device__ float* p1_theta(int n)
{
return (float*)(c_templFeatures.p1_theta_data + n * c_templFeatures.p1_theta_step);
}
static __device__ float2* p2_pos(int n)
{
return (float2*)(c_templFeatures.p2_pos_data + n * c_templFeatures.p2_pos_step);
}
static __device__ float* d12(int n)
{
return (float*)(c_templFeatures.d12_data + n * c_templFeatures.d12_step);
}
static __device__ float2* r1(int n)
{
return (float2*)(c_templFeatures.r1_data + n * c_templFeatures.r1_step);
}
static __device__ float2* r2(int n)
{
return (float2*)(c_templFeatures.r2_data + n * c_templFeatures.r2_step);
}
};
struct ImageFeatureTable
{
static __device__ float2* p1_pos(int n)
{
return (float2*)(c_imageFeatures.p1_pos_data + n * c_imageFeatures.p1_pos_step);
}
static __device__ float* p1_theta(int n)
{
return (float*)(c_imageFeatures.p1_theta_data + n * c_imageFeatures.p1_theta_step);
}
static __device__ float2* p2_pos(int n)
{
return (float2*)(c_imageFeatures.p2_pos_data + n * c_imageFeatures.p2_pos_step);
}
static __device__ float* d12(int n)
{
return (float*)(c_imageFeatures.d12_data + n * c_imageFeatures.d12_step);
}
static __device__ float2* r1(int n)
{
return (float2*)(c_imageFeatures.r1_data + n * c_imageFeatures.r1_step);
}
static __device__ float2* r2(int n)
{
return (float2*)(c_imageFeatures.r2_data + n * c_imageFeatures.r2_step);
}
};
__device__ float clampAngle(float a)
{
float res = a;
while (res > 2.0f * CV_PI_F)
res -= 2.0f * CV_PI_F;
while (res < 0.0f)
res += 2.0f * CV_PI_F;
return res;
}
__device__ bool angleEq(float a, float b, float eps)
{
return (::fabs(clampAngle(a - b)) <= eps);
}
template <class FT, bool isTempl>
__global__ void Guil_Full_buildFeatureList(const unsigned int* coordList, const float* thetaList, const int pointsCount,
int* sizes, const int maxSize,
const float xi, const float angleEpsilon, const float alphaScale,
const float2 center, const float maxDist)
{
const float p1_theta = thetaList[blockIdx.x];
const unsigned int coord1 = coordList[blockIdx.x];
float2 p1_pos;
p1_pos.x = (coord1 & 0xFFFF);
p1_pos.y = (coord1 >> 16) & 0xFFFF;
for (int i = threadIdx.x; i < pointsCount; i += blockDim.x)
{
const float p2_theta = thetaList[i];
const unsigned int coord2 = coordList[i];
float2 p2_pos;
p2_pos.x = (coord2 & 0xFFFF);
p2_pos.y = (coord2 >> 16) & 0xFFFF;
if (angleEq(p1_theta - p2_theta, xi, angleEpsilon))
{
const float2 d = p1_pos - p2_pos;
float alpha12 = clampAngle(::atan2(d.y, d.x) - p1_theta);
float d12 = ::sqrtf(d.x * d.x + d.y * d.y);
if (d12 > maxDist)
continue;
float2 r1 = p1_pos - center;
float2 r2 = p2_pos - center;
const int n = __float2int_rn(alpha12 * alphaScale);
const int ind = ::atomicAdd(sizes + n, 1);
if (ind < maxSize)
{
if (!isTempl)
{
FT::p1_pos(n)[ind] = p1_pos;
FT::p2_pos(n)[ind] = p2_pos;
}
FT::p1_theta(n)[ind] = p1_theta;
FT::d12(n)[ind] = d12;
if (isTempl)
{
FT::r1(n)[ind] = r1;
FT::r2(n)[ind] = r2;
}
}
}
}
}
template <class FT, bool isTempl>
void Guil_Full_buildFeatureList_caller(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize,
float xi, float angleEpsilon, int levels,
float2 center, float maxDist)
{
const dim3 block(256);
const dim3 grid(pointsCount);
const float alphaScale = levels / (2.0f * CV_PI_F);
Guil_Full_buildFeatureList<FT, isTempl><<<grid, block>>>(coordList, thetaList, pointsCount,
sizes, maxSize,
xi * (CV_PI_F / 180.0f), angleEpsilon * (CV_PI_F / 180.0f), alphaScale,
center, maxDist);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
thrust::device_ptr<int> sizesPtr(sizes);
thrust::transform(sizesPtr, sizesPtr + levels + 1, sizesPtr, device::bind2nd(device::minimum<int>(), maxSize));
}
void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize,
float xi, float angleEpsilon, int levels,
float2 center, float maxDist)
{
Guil_Full_buildFeatureList_caller<TemplFeatureTable, true>(coordList, thetaList, pointsCount,
sizes, maxSize,
xi, angleEpsilon, levels,
center, maxDist);
}
void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize,
float xi, float angleEpsilon, int levels,
float2 center, float maxDist)
{
Guil_Full_buildFeatureList_caller<ImageFeatureTable, false>(coordList, thetaList, pointsCount,
sizes, maxSize,
xi, angleEpsilon, levels,
center, maxDist);
}
__global__ void Guil_Full_calcOHist(const int* templSizes, const int* imageSizes, int* OHist,
const float minAngle, const float maxAngle, const float iAngleStep, const int angleRange)
{
extern __shared__ int s_OHist[];
for (int i = threadIdx.x; i <= angleRange; i += blockDim.x)
s_OHist[i] = 0;
__syncthreads();
const int tIdx = blockIdx.x;
const int level = blockIdx.y;
const int tSize = templSizes[level];
if (tIdx < tSize)
{
const int imSize = imageSizes[level];
const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx];
for (int i = threadIdx.x; i < imSize; i += blockDim.x)
{
const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
const float angle = clampAngle(im_p1_theta - t_p1_theta);
if (angle >= minAngle && angle <= maxAngle)
{
const int n = __float2int_rn((angle - minAngle) * iAngleStep);
Emulation::smem::atomicAdd(&s_OHist[n], 1);
}
}
}
__syncthreads();
for (int i = threadIdx.x; i <= angleRange; i += blockDim.x)
::atomicAdd(OHist + i, s_OHist[i]);
}
void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
float minAngle, float maxAngle, float angleStep, int angleRange,
int levels, int tMaxSize)
{
const dim3 block(256);
const dim3 grid(tMaxSize, levels + 1);
minAngle *= (CV_PI_F / 180.0f);
maxAngle *= (CV_PI_F / 180.0f);
angleStep *= (CV_PI_F / 180.0f);
const size_t smemSize = (angleRange + 1) * sizeof(float);
Guil_Full_calcOHist<<<grid, block, smemSize>>>(templSizes, imageSizes, OHist,
minAngle, maxAngle, 1.0f / angleStep, angleRange);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void Guil_Full_calcSHist(const int* templSizes, const int* imageSizes, int* SHist,
const float angle, const float angleEpsilon,
const float minScale, const float maxScale, const float iScaleStep, const int scaleRange)
{
extern __shared__ int s_SHist[];
for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x)
s_SHist[i] = 0;
__syncthreads();
const int tIdx = blockIdx.x;
const int level = blockIdx.y;
const int tSize = templSizes[level];
if (tIdx < tSize)
{
const int imSize = imageSizes[level];
const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle;
const float t_d12 = TemplFeatureTable::d12(level)[tIdx] + angle;
for (int i = threadIdx.x; i < imSize; i += blockDim.x)
{
const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
const float im_d12 = ImageFeatureTable::d12(level)[i];
if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon))
{
const float scale = im_d12 / t_d12;
if (scale >= minScale && scale <= maxScale)
{
const int s = __float2int_rn((scale - minScale) * iScaleStep);
Emulation::smem::atomicAdd(&s_SHist[s], 1);
}
}
}
}
__syncthreads();
for (int i = threadIdx.x; i <= scaleRange; i += blockDim.x)
::atomicAdd(SHist + i, s_SHist[i]);
}
void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
float angle, float angleEpsilon,
float minScale, float maxScale, float iScaleStep, int scaleRange,
int levels, int tMaxSize)
{
const dim3 block(256);
const dim3 grid(tMaxSize, levels + 1);
angle *= (CV_PI_F / 180.0f);
angleEpsilon *= (CV_PI_F / 180.0f);
const size_t smemSize = (scaleRange + 1) * sizeof(float);
Guil_Full_calcSHist<<<grid, block, smemSize>>>(templSizes, imageSizes, SHist,
angle, angleEpsilon,
minScale, maxScale, iScaleStep, scaleRange);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void Guil_Full_calcPHist(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
const float angle, const float sinVal, const float cosVal, const float angleEpsilon, const float scale,
const float idp)
{
const int tIdx = blockIdx.x;
const int level = blockIdx.y;
const int tSize = templSizes[level];
if (tIdx < tSize)
{
const int imSize = imageSizes[level];
const float t_p1_theta = TemplFeatureTable::p1_theta(level)[tIdx] + angle;
float2 r1 = TemplFeatureTable::r1(level)[tIdx];
float2 r2 = TemplFeatureTable::r2(level)[tIdx];
r1 = r1 * scale;
r2 = r2 * scale;
r1 = make_float2(cosVal * r1.x - sinVal * r1.y, sinVal * r1.x + cosVal * r1.y);
r2 = make_float2(cosVal * r2.x - sinVal * r2.y, sinVal * r2.x + cosVal * r2.y);
for (int i = threadIdx.x; i < imSize; i += blockDim.x)
{
const float im_p1_theta = ImageFeatureTable::p1_theta(level)[i];
const float2 im_p1_pos = ImageFeatureTable::p1_pos(level)[i];
const float2 im_p2_pos = ImageFeatureTable::p2_pos(level)[i];
if (angleEq(im_p1_theta, t_p1_theta, angleEpsilon))
{
float2 c1, c2;
c1 = im_p1_pos - r1;
c1 = c1 * idp;
c2 = im_p2_pos - r2;
c2 = c2 * idp;
if (::fabs(c1.x - c2.x) > 1 || ::fabs(c1.y - c2.y) > 1)
continue;
if (c1.y >= 0 && c1.y < PHist.rows - 2 && c1.x >= 0 && c1.x < PHist.cols - 2)
::atomicAdd(PHist.ptr(__float2int_rn(c1.y) + 1) + __float2int_rn(c1.x) + 1, 1);
}
}
}
}
void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
float angle, float angleEpsilon, float scale,
float dp,
int levels, int tMaxSize)
{
const dim3 block(256);
const dim3 grid(tMaxSize, levels + 1);
angle *= (CV_PI_F / 180.0f);
angleEpsilon *= (CV_PI_F / 180.0f);
const float sinVal = ::sinf(angle);
const float cosVal = ::cosf(angle);
cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_calcPHist, cudaFuncCachePreferL1) );
Guil_Full_calcPHist<<<grid, block>>>(templSizes, imageSizes, PHist,
angle, sinVal, cosVal, angleEpsilon, scale,
1.0f / dp);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void Guil_Full_findPosInHist(const PtrStepSzi hist, float4* out, int3* votes, const int maxSize,
const float angle, const int angleVotes, const float scale, const int scaleVotes,
const float dp, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x >= hist.cols - 2 || y >= hist.rows - 2)
return;
const int curVotes = hist(y + 1, x + 1);
if (curVotes > threshold &&
curVotes > hist(y + 1, x) &&
curVotes >= hist(y + 1, x + 2) &&
curVotes > hist(y, x + 1) &&
curVotes >= hist(y + 2, x + 1))
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
{
out[ind] = make_float4(x * dp, y * dp, scale, angle);
votes[ind] = make_int3(curVotes, scaleVotes, angleVotes);
}
}
}
int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
float angle, int angleVotes, float scale, int scaleVotes,
float dp, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemcpy(counterPtr, &curSize, sizeof(int), cudaMemcpyHostToDevice) );
const dim3 block(32, 8);
const dim3 grid(divUp(hist.cols - 2, block.x), divUp(hist.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(Guil_Full_findPosInHist, cudaFuncCachePreferL1) );
Guil_Full_findPosInHist<<<grid, block>>>(hist, out, votes, maxSize,
angle, angleVotes, scale, scaleVotes,
dp, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
return totalCount;
}
}
}}}
#endif // HAVE_OPENCV_CUDAARITHM
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,143 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/utility.hpp"
namespace cv { namespace cuda { namespace device
{
namespace gfft
{
texture<float, cudaTextureType2D, cudaReadModeElementType> eigTex(0, cudaFilterModePoint, cudaAddressModeClamp);
__device__ int g_counter = 0;
template <class Mask> __global__ void findCorners(float threshold, const Mask mask, float2* corners, int max_count, int rows, int cols)
{
const int j = blockIdx.x * blockDim.x + threadIdx.x;
const int i = blockIdx.y * blockDim.y + threadIdx.y;
if (i > 0 && i < rows - 1 && j > 0 && j < cols - 1 && mask(i, j))
{
float val = tex2D(eigTex, j, i);
if (val > threshold)
{
float maxVal = val;
maxVal = ::fmax(tex2D(eigTex, j - 1, i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j , i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i - 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j - 1, i), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i), maxVal);
maxVal = ::fmax(tex2D(eigTex, j - 1, i + 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j , i + 1), maxVal);
maxVal = ::fmax(tex2D(eigTex, j + 1, i + 1), maxVal);
if (val == maxVal)
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < max_count)
corners[ind] = make_float2(j, i);
}
}
}
}
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count)
{
void* counter_ptr;
cudaSafeCall( cudaGetSymbolAddress(&counter_ptr, g_counter) );
cudaSafeCall( cudaMemset(counter_ptr, 0, sizeof(int)) );
bindTexture(&eigTex, eig);
dim3 block(16, 16);
dim3 grid(divUp(eig.cols, block.x), divUp(eig.rows, block.y));
if (mask.data)
findCorners<<<grid, block>>>(threshold, SingleMask(mask), corners, max_count, eig.rows, eig.cols);
else
findCorners<<<grid, block>>>(threshold, WithOutMask(), corners, max_count, eig.rows, eig.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int count;
cudaSafeCall( cudaMemcpy(&count, counter_ptr, sizeof(int), cudaMemcpyDeviceToHost) );
return std::min(count, max_count);
}
class EigGreater
{
public:
__device__ __forceinline__ bool operator()(float2 a, float2 b) const
{
return tex2D(eigTex, a.x, a.y) > tex2D(eigTex, b.x, b.y);
}
};
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count)
{
bindTexture(&eigTex, eig);
thrust::device_ptr<float2> ptr(corners);
thrust::sort(ptr, ptr + count, EigGreater());
}
} // namespace optical_flow
}}}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,233 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/transform.hpp"
using namespace cv::cuda;
using namespace cv::cuda::device;
namespace hist
{
__global__ void histogram256Kernel(const uchar* src, int cols, int rows, size_t step, int* hist)
{
__shared__ int shist[256];
const int y = blockIdx.x * blockDim.y + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
shist[tid] = 0;
__syncthreads();
if (y < rows)
{
const unsigned int* rowPtr = (const unsigned int*) (src + y * step);
const int cols_4 = cols / 4;
for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
{
unsigned int data = rowPtr[x];
Emulation::smem::atomicAdd(&shist[(data >> 0) & 0xFFU], 1);
Emulation::smem::atomicAdd(&shist[(data >> 8) & 0xFFU], 1);
Emulation::smem::atomicAdd(&shist[(data >> 16) & 0xFFU], 1);
Emulation::smem::atomicAdd(&shist[(data >> 24) & 0xFFU], 1);
}
if (cols % 4 != 0 && threadIdx.x == 0)
{
for (int x = cols_4 * 4; x < cols; ++x)
{
unsigned int data = ((const uchar*)rowPtr)[x];
Emulation::smem::atomicAdd(&shist[data], 1);
}
}
}
__syncthreads();
const int histVal = shist[tid];
if (histVal > 0)
::atomicAdd(hist + tid, histVal);
}
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.rows, block.y));
histogram256Kernel<<<grid, block, 0, stream>>>(src.data, src.cols, src.rows, src.step, hist);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
/////////////////////////////////////////////////////////////////////////
namespace hist
{
__device__ __forceinline__ void histEvenInc(int* shist, uint data, int binSize, int lowerLevel, int upperLevel)
{
if (data >= lowerLevel && data <= upperLevel)
{
const uint ind = (data - lowerLevel) / binSize;
Emulation::smem::atomicAdd(shist + ind, 1);
}
}
__global__ void histEven8u(const uchar* src, const size_t step, const int rows, const int cols,
int* hist, const int binCount, const int binSize, const int lowerLevel, const int upperLevel)
{
extern __shared__ int shist[];
const int y = blockIdx.x * blockDim.y + threadIdx.y;
const int tid = threadIdx.y * blockDim.x + threadIdx.x;
if (tid < binCount)
shist[tid] = 0;
__syncthreads();
if (y < rows)
{
const uchar* rowPtr = src + y * step;
const uint* rowPtr4 = (uint*) rowPtr;
const int cols_4 = cols / 4;
for (int x = threadIdx.x; x < cols_4; x += blockDim.x)
{
const uint data = rowPtr4[x];
histEvenInc(shist, (data >> 0) & 0xFFU, binSize, lowerLevel, upperLevel);
histEvenInc(shist, (data >> 8) & 0xFFU, binSize, lowerLevel, upperLevel);
histEvenInc(shist, (data >> 16) & 0xFFU, binSize, lowerLevel, upperLevel);
histEvenInc(shist, (data >> 24) & 0xFFU, binSize, lowerLevel, upperLevel);
}
if (cols % 4 != 0 && threadIdx.x == 0)
{
for (int x = cols_4 * 4; x < cols; ++x)
{
const uchar data = rowPtr[x];
histEvenInc(shist, data, binSize, lowerLevel, upperLevel);
}
}
}
__syncthreads();
if (tid < binCount)
{
const int histVal = shist[tid];
if (histVal > 0)
::atomicAdd(hist + tid, histVal);
}
}
void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.rows, block.y));
const int binSize = divUp(upperLevel - lowerLevel, binCount);
const size_t smem_size = binCount * sizeof(int);
histEven8u<<<grid, block, smem_size, stream>>>(src.data, src.step, src.rows, src.cols, hist, binCount, binSize, lowerLevel, upperLevel);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
/////////////////////////////////////////////////////////////////////////
namespace hist
{
__constant__ int c_lut[256];
struct EqualizeHist : unary_function<uchar, uchar>
{
float scale;
__host__ EqualizeHist(float _scale) : scale(_scale) {}
__device__ __forceinline__ uchar operator ()(uchar val) const
{
const int lut = c_lut[val];
return __float2int_rn(scale * lut);
}
};
}
namespace cv { namespace cuda { namespace device
{
template <> struct TransformFunctorTraits<hist::EqualizeHist> : DefaultTransformFunctorTraits<hist::EqualizeHist>
{
enum { smart_shift = 4 };
};
}}}
namespace hist
{
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream)
{
if (stream == 0)
cudaSafeCall( cudaMemcpyToSymbol(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice) );
else
cudaSafeCall( cudaMemcpyToSymbolAsync(c_lut, lut, 256 * sizeof(int), 0, cudaMemcpyDeviceToDevice, stream) );
const float scale = 255.0f / (src.cols * src.rows);
device::transform(src, dst, EqualizeHist(scale), WithOutMask(), stream);
}
}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,260 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDAFILTERS
namespace cv { namespace cuda { namespace device
{
namespace hough_circles
{
__device__ int g_counter;
////////////////////////////////////////////////////////////////////////
// circlesAccumCenters
__global__ void circlesAccumCenters(const unsigned int* list, const int count, const PtrStepi dx, const PtrStepi dy,
PtrStepi accum, const int width, const int height, const int minRadius, const int maxRadius, const float idp)
{
const int SHIFT = 10;
const int ONE = 1 << SHIFT;
const int tid = blockIdx.x * blockDim.x + threadIdx.x;
if (tid >= count)
return;
const unsigned int val = list[tid];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
const int vx = dx(y, x);
const int vy = dy(y, x);
if (vx == 0 && vy == 0)
return;
const float mag = ::sqrtf(vx * vx + vy * vy);
const int x0 = __float2int_rn((x * idp) * ONE);
const int y0 = __float2int_rn((y * idp) * ONE);
int sx = __float2int_rn((vx * idp) * ONE / mag);
int sy = __float2int_rn((vy * idp) * ONE / mag);
// Step from minRadius to maxRadius in both directions of the gradient
for (int k1 = 0; k1 < 2; ++k1)
{
int x1 = x0 + minRadius * sx;
int y1 = y0 + minRadius * sy;
for (int r = minRadius; r <= maxRadius; x1 += sx, y1 += sy, ++r)
{
const int x2 = x1 >> SHIFT;
const int y2 = y1 >> SHIFT;
if (x2 < 0 || x2 >= width || y2 < 0 || y2 >= height)
break;
::atomicAdd(accum.ptr(y2 + 1) + x2 + 1, 1);
}
sx = -sx;
sy = -sy;
}
}
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp)
{
const dim3 block(256);
const dim3 grid(divUp(count, block.x));
cudaSafeCall( cudaFuncSetCacheConfig(circlesAccumCenters, cudaFuncCachePreferL1) );
circlesAccumCenters<<<grid, block>>>(list, count, dx, dy, accum, accum.cols - 2, accum.rows - 2, minRadius, maxRadius, idp);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// buildCentersList
__global__ void buildCentersList(const PtrStepSzi accum, unsigned int* centers, const int threshold)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < accum.cols - 2 && y < accum.rows - 2)
{
const int top = accum(y, x + 1);
const int left = accum(y + 1, x);
const int cur = accum(y + 1, x + 1);
const int right = accum(y + 1, x + 2);
const int bottom = accum(y + 2, x + 1);
if (cur > threshold && cur > top && cur >= bottom && cur > left && cur >= right)
{
const unsigned int val = (y << 16) | x;
const int idx = ::atomicAdd(&g_counter, 1);
centers[idx] = val;
}
}
}
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(buildCentersList, cudaFuncCachePreferL1) );
buildCentersList<<<grid, block>>>(accum, centers, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
return totalCount;
}
////////////////////////////////////////////////////////////////////////
// circlesAccumRadius
__global__ void circlesAccumRadius(const unsigned int* centers, const unsigned int* list, const int count,
float3* circles, const int maxCircles, const float dp,
const int minRadius, const int maxRadius, const int histSize, const int threshold)
{
int* smem = DynamicSharedMem<int>();
for (int i = threadIdx.x; i < histSize + 2; i += blockDim.x)
smem[i] = 0;
__syncthreads();
unsigned int val = centers[blockIdx.x];
float cx = (val & 0xFFFF);
float cy = (val >> 16) & 0xFFFF;
cx = (cx + 0.5f) * dp;
cy = (cy + 0.5f) * dp;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
const float rad = ::sqrtf((cx - x) * (cx - x) + (cy - y) * (cy - y));
if (rad >= minRadius && rad <= maxRadius)
{
const int r = __float2int_rn(rad - minRadius);
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
}
__syncthreads();
for (int i = threadIdx.x; i < histSize; i += blockDim.x)
{
const int curVotes = smem[i + 1];
if (curVotes >= threshold && curVotes > smem[i] && curVotes >= smem[i + 2])
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxCircles)
circles[ind] = make_float3(cx, cy, i + minRadius);
}
}
}
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(centersCount);
const int histSize = maxRadius - minRadius + 1;
size_t smemSize = (histSize + 2) * sizeof(int);
circlesAccumRadius<<<grid, block, smemSize>>>(centers, list, count, circles, maxCircles, dp, minRadius, maxRadius, histSize, threshold);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxCircles);
return totalCount;
}
}
}}}
#endif // HAVE_OPENCV_CUDAFILTERS
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,212 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include <thrust/device_ptr.h>
#include <thrust/sort.h>
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/emulation.hpp"
#include "opencv2/core/cuda/dynamic_smem.hpp"
namespace cv { namespace cuda { namespace device
{
namespace hough_lines
{
__device__ int g_counter;
////////////////////////////////////////////////////////////////////////
// linesAccum
__global__ void linesAccumGlobal(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
{
const int n = blockIdx.x;
const float ang = n * theta;
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const int shift = (numrho - 1) / 2;
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
::atomicAdd(accumRow + r + 1, 1);
}
}
__global__ void linesAccumShared(const unsigned int* list, const int count, PtrStepi accum, const float irho, const float theta, const int numrho)
{
int* smem = DynamicSharedMem<int>();
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
smem[i] = 0;
__syncthreads();
const int n = blockIdx.x;
const float ang = n * theta;
float sinVal;
float cosVal;
sincosf(ang, &sinVal, &cosVal);
sinVal *= irho;
cosVal *= irho;
const int shift = (numrho - 1) / 2;
for (int i = threadIdx.x; i < count; i += blockDim.x)
{
const unsigned int val = list[i];
const int x = (val & 0xFFFF);
const int y = (val >> 16) & 0xFFFF;
int r = __float2int_rn(x * cosVal + y * sinVal);
r += shift;
Emulation::smem::atomicAdd(&smem[r + 1], 1);
}
__syncthreads();
int* accumRow = accum.ptr(n + 1);
for (int i = threadIdx.x; i < numrho + 1; i += blockDim.x)
accumRow[i] = smem[i];
}
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20)
{
const dim3 block(has20 ? 1024 : 512);
const dim3 grid(accum.rows - 2);
size_t smemSize = (accum.cols - 1) * sizeof(int);
if (smemSize < sharedMemPerBlock - 1000)
linesAccumShared<<<grid, block, smemSize>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
else
linesAccumGlobal<<<grid, block>>>(list, count, accum, 1.0f / rho, theta, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
////////////////////////////////////////////////////////////////////////
// linesGetResult
__global__ void linesGetResult(const PtrStepSzi accum, float2* out, int* votes, const int maxSize, const float rho, const float theta, const int threshold, const int numrho)
{
const int r = blockIdx.x * blockDim.x + threadIdx.x;
const int n = blockIdx.y * blockDim.y + threadIdx.y;
if (r >= accum.cols - 2 || n >= accum.rows - 2)
return;
const int curVotes = accum(n + 1, r + 1);
if (curVotes > threshold &&
curVotes > accum(n + 1, r) &&
curVotes >= accum(n + 1, r + 2) &&
curVotes > accum(n, r + 1) &&
curVotes >= accum(n + 2, r + 1))
{
const float radius = (r - (numrho - 1) * 0.5f) * rho;
const float angle = n * theta;
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
{
out[ind] = make_float2(radius, angle);
votes[ind] = curVotes;
}
}
}
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
cudaSafeCall( cudaFuncSetCacheConfig(linesGetResult, cudaFuncCachePreferL1) );
linesGetResult<<<grid, block>>>(accum, out, votes, maxSize, rho, theta, threshold, accum.cols - 2);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
if (doSort && totalCount > 0)
{
thrust::device_ptr<float2> outPtr(out);
thrust::device_ptr<int> votesPtr(votes);
thrust::sort_by_key(votesPtr, votesPtr + totalCount, outPtr, thrust::greater<int>());
}
return totalCount;
}
}
}}}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,249 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
namespace cv { namespace cuda { namespace device
{
namespace hough_segments
{
__device__ int g_counter;
texture<uchar, cudaTextureType2D, cudaReadModeElementType> tex_mask(false, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void houghLinesProbabilistic(const PtrStepSzi accum,
int4* out, const int maxSize,
const float rho, const float theta,
const int lineGap, const int lineLength,
const int rows, const int cols)
{
const int r = blockIdx.x * blockDim.x + threadIdx.x;
const int n = blockIdx.y * blockDim.y + threadIdx.y;
if (r >= accum.cols - 2 || n >= accum.rows - 2)
return;
const int curVotes = accum(n + 1, r + 1);
if (curVotes >= lineLength &&
curVotes > accum(n, r) &&
curVotes > accum(n, r + 1) &&
curVotes > accum(n, r + 2) &&
curVotes > accum(n + 1, r) &&
curVotes > accum(n + 1, r + 2) &&
curVotes > accum(n + 2, r) &&
curVotes > accum(n + 2, r + 1) &&
curVotes > accum(n + 2, r + 2))
{
const float radius = (r - (accum.cols - 2 - 1) * 0.5f) * rho;
const float angle = n * theta;
float cosa;
float sina;
sincosf(angle, &sina, &cosa);
float2 p0 = make_float2(cosa * radius, sina * radius);
float2 dir = make_float2(-sina, cosa);
float2 pb[4] = {make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1), make_float2(-1, -1)};
float a;
if (dir.x != 0)
{
a = -p0.x / dir.x;
pb[0].x = 0;
pb[0].y = p0.y + a * dir.y;
a = (cols - 1 - p0.x) / dir.x;
pb[1].x = cols - 1;
pb[1].y = p0.y + a * dir.y;
}
if (dir.y != 0)
{
a = -p0.y / dir.y;
pb[2].x = p0.x + a * dir.x;
pb[2].y = 0;
a = (rows - 1 - p0.y) / dir.y;
pb[3].x = p0.x + a * dir.x;
pb[3].y = rows - 1;
}
if (pb[0].x == 0 && (pb[0].y >= 0 && pb[0].y < rows))
{
p0 = pb[0];
if (dir.x < 0)
dir = -dir;
}
else if (pb[1].x == cols - 1 && (pb[0].y >= 0 && pb[0].y < rows))
{
p0 = pb[1];
if (dir.x > 0)
dir = -dir;
}
else if (pb[2].y == 0 && (pb[2].x >= 0 && pb[2].x < cols))
{
p0 = pb[2];
if (dir.y < 0)
dir = -dir;
}
else if (pb[3].y == rows - 1 && (pb[3].x >= 0 && pb[3].x < cols))
{
p0 = pb[3];
if (dir.y > 0)
dir = -dir;
}
float2 d;
if (::fabsf(dir.x) > ::fabsf(dir.y))
{
d.x = dir.x > 0 ? 1 : -1;
d.y = dir.y / ::fabsf(dir.x);
}
else
{
d.x = dir.x / ::fabsf(dir.y);
d.y = dir.y > 0 ? 1 : -1;
}
float2 line_end[2];
int gap;
bool inLine = false;
float2 p1 = p0;
if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
return;
for (;;)
{
if (tex2D(tex_mask, p1.x, p1.y))
{
gap = 0;
if (!inLine)
{
line_end[0] = p1;
line_end[1] = p1;
inLine = true;
}
else
{
line_end[1] = p1;
}
}
else if (inLine)
{
if (++gap > lineGap)
{
bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
::abs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
gap = 0;
inLine = false;
}
}
p1 = p1 + d;
if (p1.x < 0 || p1.x >= cols || p1.y < 0 || p1.y >= rows)
{
if (inLine)
{
bool good_line = ::abs(line_end[1].x - line_end[0].x) >= lineLength ||
::abs(line_end[1].y - line_end[0].y) >= lineLength;
if (good_line)
{
const int ind = ::atomicAdd(&g_counter, 1);
if (ind < maxSize)
out[ind] = make_int4(line_end[0].x, line_end[0].y, line_end[1].x, line_end[1].y);
}
}
break;
}
}
}
}
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength)
{
void* counterPtr;
cudaSafeCall( cudaGetSymbolAddress(&counterPtr, g_counter) );
cudaSafeCall( cudaMemset(counterPtr, 0, sizeof(int)) );
const dim3 block(32, 8);
const dim3 grid(divUp(accum.cols - 2, block.x), divUp(accum.rows - 2, block.y));
bindTexture(&tex_mask, mask);
houghLinesProbabilistic<<<grid, block>>>(accum,
out, maxSize,
rho, theta,
lineGap, lineLength,
mask.rows, mask.cols);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
int totalCount;
cudaSafeCall( cudaMemcpy(&totalCount, counterPtr, sizeof(int), cudaMemcpyDeviceToHost) );
totalCount = ::min(totalCount, maxSize);
return totalCount;
}
}
}}}
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,916 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
namespace cv { namespace cuda { namespace device
{
namespace match_template
{
__device__ __forceinline__ float sum(float v) { return v; }
__device__ __forceinline__ float sum(float2 v) { return v.x + v.y; }
__device__ __forceinline__ float sum(float3 v) { return v.x + v.y + v.z; }
__device__ __forceinline__ float sum(float4 v) { return v.x + v.y + v.z + v.w; }
__device__ __forceinline__ float first(float v) { return v; }
__device__ __forceinline__ float first(float2 v) { return v.x; }
__device__ __forceinline__ float first(float3 v) { return v.x; }
__device__ __forceinline__ float first(float4 v) { return v.x; }
__device__ __forceinline__ float mul(float a, float b) { return a * b; }
__device__ __forceinline__ float2 mul(float2 a, float2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__ __forceinline__ float3 mul(float3 a, float3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__ __forceinline__ float4 mul(float4 a, float4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__ __forceinline__ float mul(uchar a, uchar b) { return a * b; }
__device__ __forceinline__ float2 mul(uchar2 a, uchar2 b) { return make_float2(a.x * b.x, a.y * b.y); }
__device__ __forceinline__ float3 mul(uchar3 a, uchar3 b) { return make_float3(a.x * b.x, a.y * b.y, a.z * b.z); }
__device__ __forceinline__ float4 mul(uchar4 a, uchar4 b) { return make_float4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w); }
__device__ __forceinline__ float sub(float a, float b) { return a - b; }
__device__ __forceinline__ float2 sub(float2 a, float2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__ __forceinline__ float3 sub(float3 a, float3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ __forceinline__ float4 sub(float4 a, float4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
__device__ __forceinline__ float sub(uchar a, uchar b) { return a - b; }
__device__ __forceinline__ float2 sub(uchar2 a, uchar2 b) { return make_float2(a.x - b.x, a.y - b.y); }
__device__ __forceinline__ float3 sub(uchar3 a, uchar3 b) { return make_float3(a.x - b.x, a.y - b.y, a.z - b.z); }
__device__ __forceinline__ float4 sub(uchar4 a, uchar4 b) { return make_float4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); }
//////////////////////////////////////////////////////////////////////
// Naive_CCORR
template <typename T, int cn>
__global__ void matchTemplateNaiveKernel_CCORR(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result)
{
typedef typename TypeVec<T, cn>::vec_type Type;
typedef typename TypeVec<float, cn>::vec_type Typef;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
Typef res = VecTraits<Typef>::all(0);
for (int i = 0; i < h; ++i)
{
const Type* image_ptr = (const Type*)image.ptr(y + i);
const Type* templ_ptr = (const Type*)templ.ptr(i);
for (int j = 0; j < w; ++j)
res = res + mul(image_ptr[x + j], templ_ptr[j]);
}
result.ptr(y)[x] = sum(res);
}
}
template <typename T, int cn>
void matchTemplateNaive_CCORR(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream)
{
const dim3 threads(32, 8);
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplateNaiveKernel_CCORR<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
{
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
static const caller_t callers[] =
{
0, matchTemplateNaive_CCORR<float, 1>, matchTemplateNaive_CCORR<float, 2>, matchTemplateNaive_CCORR<float, 3>, matchTemplateNaive_CCORR<float, 4>
};
callers[cn](image, templ, result, stream);
}
void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
{
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
static const caller_t callers[] =
{
0, matchTemplateNaive_CCORR<uchar, 1>, matchTemplateNaive_CCORR<uchar, 2>, matchTemplateNaive_CCORR<uchar, 3>, matchTemplateNaive_CCORR<uchar, 4>
};
callers[cn](image, templ, result, stream);
}
//////////////////////////////////////////////////////////////////////
// Naive_SQDIFF
template <typename T, int cn>
__global__ void matchTemplateNaiveKernel_SQDIFF(int w, int h, const PtrStepb image, const PtrStepb templ, PtrStepSzf result)
{
typedef typename TypeVec<T, cn>::vec_type Type;
typedef typename TypeVec<float, cn>::vec_type Typef;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
Typef res = VecTraits<Typef>::all(0);
Typef delta;
for (int i = 0; i < h; ++i)
{
const Type* image_ptr = (const Type*)image.ptr(y + i);
const Type* templ_ptr = (const Type*)templ.ptr(i);
for (int j = 0; j < w; ++j)
{
delta = sub(image_ptr[x + j], templ_ptr[j]);
res = res + delta * delta;
}
}
result.ptr(y)[x] = sum(res);
}
}
template <typename T, int cn>
void matchTemplateNaive_SQDIFF(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream)
{
const dim3 threads(32, 8);
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplateNaiveKernel_SQDIFF<T, cn><<<grid, threads, 0, stream>>>(templ.cols, templ.rows, image, templ, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
{
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
static const caller_t callers[] =
{
0, matchTemplateNaive_SQDIFF<float, 1>, matchTemplateNaive_SQDIFF<float, 2>, matchTemplateNaive_SQDIFF<float, 3>, matchTemplateNaive_SQDIFF<float, 4>
};
callers[cn](image, templ, result, stream);
}
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream)
{
typedef void (*caller_t)(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, cudaStream_t stream);
static const caller_t callers[] =
{
0, matchTemplateNaive_SQDIFF<uchar, 1>, matchTemplateNaive_SQDIFF<uchar, 2>, matchTemplateNaive_SQDIFF<uchar, 3>, matchTemplateNaive_SQDIFF<uchar, 4>
};
callers[cn](image, templ, result, stream);
}
//////////////////////////////////////////////////////////////////////
// Prepared_SQDIFF
template <int cn>
__global__ void matchTemplatePreparedKernel_SQDIFF_8U(int w, int h, const PtrStep<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = image_sqsum_ - 2.f * ccorr + templ_sqsum;
}
}
template <int cn>
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream)
{
const dim3 threads(32, 8);
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_SQDIFF_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, int cn,
cudaStream_t stream)
{
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
static const caller_t callers[] =
{
0, matchTemplatePrepared_SQDIFF_8U<1>, matchTemplatePrepared_SQDIFF_8U<2>, matchTemplatePrepared_SQDIFF_8U<3>, matchTemplatePrepared_SQDIFF_8U<4>
};
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
}
//////////////////////////////////////////////////////////////////////
// Prepared_SQDIFF_NORMED
// normAcc* are accurate normalization routines which make GPU matchTemplate
// consistent with CPU one
__device__ float normAcc(float num, float denum)
{
if (::fabs(num) < denum)
return num / denum;
if (::fabs(num) < denum * 1.125f)
return num > 0 ? 1 : -1;
return 0;
}
__device__ float normAcc_SQDIFF(float num, float denum)
{
if (::fabs(num) < denum)
return num / denum;
if (::fabs(num) < denum * 1.125f)
return num > 0 ? 1 : -1;
return 1;
}
template <int cn>
__global__ void matchTemplatePreparedKernel_SQDIFF_NORMED_8U(
int w, int h, const PtrStep<unsigned long long> image_sqsum,
unsigned long long templ_sqsum, PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = normAcc_SQDIFF(image_sqsum_ - 2.f * ccorr + templ_sqsum,
sqrtf(image_sqsum_ * templ_sqsum));
}
}
template <int cn>
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
PtrStepSzf result, cudaStream_t stream)
{
const dim3 threads(32, 8);
const dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_SQDIFF_NORMED_8U<cn><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum,
PtrStepSzf result, int cn, cudaStream_t stream)
{
typedef void (*caller_t)(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result, cudaStream_t stream);
static const caller_t callers[] =
{
0, matchTemplatePrepared_SQDIFF_NORMED_8U<1>, matchTemplatePrepared_SQDIFF_NORMED_8U<2>, matchTemplatePrepared_SQDIFF_NORMED_8U<3>, matchTemplatePrepared_SQDIFF_NORMED_8U<4>
};
callers[cn](w, h, image_sqsum, templ_sqsum, result, stream);
}
//////////////////////////////////////////////////////////////////////
// Prepared_CCOFF
__global__ void matchTemplatePreparedKernel_CCOFF_8U(int w, int h, float templ_sum_scale, const PtrStep<unsigned int> image_sum, PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sum_ = (float)(
(image_sum.ptr(y + h)[x + w] - image_sum.ptr(y)[x + w]) -
(image_sum.ptr(y + h)[x] - image_sum.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = ccorr - image_sum_ * templ_sum_scale;
}
}
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_CCOFF_8U<<<grid, threads, 0, stream>>>(w, h, (float)templ_sum / (w * h), image_sum, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void matchTemplatePreparedKernel_CCOFF_8UC2(
int w, int h, float templ_sum_scale_r, float templ_sum_scale_g,
const PtrStep<unsigned int> image_sum_r,
const PtrStep<unsigned int> image_sum_g,
PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sum_r_ = (float)(
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
float image_sum_g_ = (float)(
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
- image_sum_g_ * templ_sum_scale_g;
}
}
void matchTemplatePrepared_CCOFF_8UC2(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r,
const PtrStepSz<unsigned int> image_sum_g,
unsigned int templ_sum_r, unsigned int templ_sum_g,
PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_CCOFF_8UC2<<<grid, threads, 0, stream>>>(
w, h, (float)templ_sum_r / (w * h), (float)templ_sum_g / (w * h),
image_sum_r, image_sum_g, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void matchTemplatePreparedKernel_CCOFF_8UC3(
int w, int h,
float templ_sum_scale_r,
float templ_sum_scale_g,
float templ_sum_scale_b,
const PtrStep<unsigned int> image_sum_r,
const PtrStep<unsigned int> image_sum_g,
const PtrStep<unsigned int> image_sum_b,
PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sum_r_ = (float)(
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
float image_sum_g_ = (float)(
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
float image_sum_b_ = (float)(
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
- image_sum_g_ * templ_sum_scale_g
- image_sum_b_ * templ_sum_scale_b;
}
}
void matchTemplatePrepared_CCOFF_8UC3(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r,
const PtrStepSz<unsigned int> image_sum_g,
const PtrStepSz<unsigned int> image_sum_b,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_CCOFF_8UC3<<<grid, threads, 0, stream>>>(
w, h,
(float)templ_sum_r / (w * h),
(float)templ_sum_g / (w * h),
(float)templ_sum_b / (w * h),
image_sum_r, image_sum_g, image_sum_b, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void matchTemplatePreparedKernel_CCOFF_8UC4(
int w, int h,
float templ_sum_scale_r,
float templ_sum_scale_g,
float templ_sum_scale_b,
float templ_sum_scale_a,
const PtrStep<unsigned int> image_sum_r,
const PtrStep<unsigned int> image_sum_g,
const PtrStep<unsigned int> image_sum_b,
const PtrStep<unsigned int> image_sum_a,
PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sum_r_ = (float)(
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
float image_sum_g_ = (float)(
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
float image_sum_b_ = (float)(
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
float image_sum_a_ = (float)(
(image_sum_a.ptr(y + h)[x + w] - image_sum_a.ptr(y)[x + w]) -
(image_sum_a.ptr(y + h)[x] - image_sum_a.ptr(y)[x]));
float ccorr = result.ptr(y)[x];
result.ptr(y)[x] = ccorr - image_sum_r_ * templ_sum_scale_r
- image_sum_g_ * templ_sum_scale_g
- image_sum_b_ * templ_sum_scale_b
- image_sum_a_ * templ_sum_scale_a;
}
}
void matchTemplatePrepared_CCOFF_8UC4(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r,
const PtrStepSz<unsigned int> image_sum_g,
const PtrStepSz<unsigned int> image_sum_b,
const PtrStepSz<unsigned int> image_sum_a,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
unsigned int templ_sum_a,
PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
matchTemplatePreparedKernel_CCOFF_8UC4<<<grid, threads, 0, stream>>>(
w, h,
(float)templ_sum_r / (w * h),
(float)templ_sum_g / (w * h),
(float)templ_sum_b / (w * h),
(float)templ_sum_a / (w * h),
image_sum_r, image_sum_g, image_sum_b, image_sum_a,
result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
//////////////////////////////////////////////////////////////////////
// Prepared_CCOFF_NORMED
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8U(
int w, int h, float weight,
float templ_sum_scale, float templ_sqsum_scale,
const PtrStep<unsigned int> image_sum,
const PtrStep<unsigned long long> image_sqsum,
PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float ccorr = result.ptr(y)[x];
float image_sum_ = (float)(
(image_sum.ptr(y + h)[x + w] - image_sum.ptr(y)[x + w]) -
(image_sum.ptr(y + h)[x] - image_sum.ptr(y)[x]));
float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[x + w] - image_sqsum.ptr(y)[x + w]) -
(image_sqsum.ptr(y + h)[x] - image_sqsum.ptr(y)[x]));
result.ptr(y)[x] = normAcc(ccorr - image_sum_ * templ_sum_scale,
sqrtf(templ_sqsum_scale * (image_sqsum_ - weight * image_sum_ * image_sum_)));
}
}
void matchTemplatePrepared_CCOFF_NORMED_8U(
int w, int h, const PtrStepSz<unsigned int> image_sum,
const PtrStepSz<unsigned long long> image_sqsum,
unsigned int templ_sum, unsigned long long templ_sqsum,
PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
float weight = 1.f / (w * h);
float templ_sum_scale = templ_sum * weight;
float templ_sqsum_scale = templ_sqsum - weight * templ_sum * templ_sum;
matchTemplatePreparedKernel_CCOFF_NORMED_8U<<<grid, threads, 0, stream>>>(
w, h, weight, templ_sum_scale, templ_sqsum_scale,
image_sum, image_sqsum, result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC2(
int w, int h, float weight,
float templ_sum_scale_r, float templ_sum_scale_g,
float templ_sqsum_scale,
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sum_r_ = (float)(
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
float image_sqsum_r_ = (float)(
(image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) -
(image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x]));
float image_sum_g_ = (float)(
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
float image_sqsum_g_ = (float)(
(image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) -
(image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x]));
float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r
- image_sum_g_ * templ_sum_scale_g;
float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_));
result.ptr(y)[x] = normAcc(num, denum);
}
}
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
float weight = 1.f / (w * h);
float templ_sum_scale_r = templ_sum_r * weight;
float templ_sum_scale_g = templ_sum_g * weight;
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g;
matchTemplatePreparedKernel_CCOFF_NORMED_8UC2<<<grid, threads, 0, stream>>>(
w, h, weight,
templ_sum_scale_r, templ_sum_scale_g,
templ_sqsum_scale,
image_sum_r, image_sqsum_r,
image_sum_g, image_sqsum_g,
result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC3(
int w, int h, float weight,
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
float templ_sqsum_scale,
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sum_r_ = (float)(
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
float image_sqsum_r_ = (float)(
(image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) -
(image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x]));
float image_sum_g_ = (float)(
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
float image_sqsum_g_ = (float)(
(image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) -
(image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x]));
float image_sum_b_ = (float)(
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
float image_sqsum_b_ = (float)(
(image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) -
(image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x]));
float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r
- image_sum_g_ * templ_sum_scale_g
- image_sum_b_ * templ_sum_scale_b;
float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_));
result.ptr(y)[x] = normAcc(num, denum);
}
}
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
float weight = 1.f / (w * h);
float templ_sum_scale_r = templ_sum_r * weight;
float templ_sum_scale_g = templ_sum_g * weight;
float templ_sum_scale_b = templ_sum_b * weight;
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g
+ templ_sqsum_b - weight * templ_sum_b * templ_sum_b;
matchTemplatePreparedKernel_CCOFF_NORMED_8UC3<<<grid, threads, 0, stream>>>(
w, h, weight,
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b,
templ_sqsum_scale,
image_sum_r, image_sqsum_r,
image_sum_g, image_sqsum_g,
image_sum_b, image_sqsum_b,
result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void matchTemplatePreparedKernel_CCOFF_NORMED_8UC4(
int w, int h, float weight,
float templ_sum_scale_r, float templ_sum_scale_g, float templ_sum_scale_b,
float templ_sum_scale_a, float templ_sqsum_scale,
const PtrStep<unsigned int> image_sum_r, const PtrStep<unsigned long long> image_sqsum_r,
const PtrStep<unsigned int> image_sum_g, const PtrStep<unsigned long long> image_sqsum_g,
const PtrStep<unsigned int> image_sum_b, const PtrStep<unsigned long long> image_sqsum_b,
const PtrStep<unsigned int> image_sum_a, const PtrStep<unsigned long long> image_sqsum_a,
PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sum_r_ = (float)(
(image_sum_r.ptr(y + h)[x + w] - image_sum_r.ptr(y)[x + w]) -
(image_sum_r.ptr(y + h)[x] - image_sum_r.ptr(y)[x]));
float image_sqsum_r_ = (float)(
(image_sqsum_r.ptr(y + h)[x + w] - image_sqsum_r.ptr(y)[x + w]) -
(image_sqsum_r.ptr(y + h)[x] - image_sqsum_r.ptr(y)[x]));
float image_sum_g_ = (float)(
(image_sum_g.ptr(y + h)[x + w] - image_sum_g.ptr(y)[x + w]) -
(image_sum_g.ptr(y + h)[x] - image_sum_g.ptr(y)[x]));
float image_sqsum_g_ = (float)(
(image_sqsum_g.ptr(y + h)[x + w] - image_sqsum_g.ptr(y)[x + w]) -
(image_sqsum_g.ptr(y + h)[x] - image_sqsum_g.ptr(y)[x]));
float image_sum_b_ = (float)(
(image_sum_b.ptr(y + h)[x + w] - image_sum_b.ptr(y)[x + w]) -
(image_sum_b.ptr(y + h)[x] - image_sum_b.ptr(y)[x]));
float image_sqsum_b_ = (float)(
(image_sqsum_b.ptr(y + h)[x + w] - image_sqsum_b.ptr(y)[x + w]) -
(image_sqsum_b.ptr(y + h)[x] - image_sqsum_b.ptr(y)[x]));
float image_sum_a_ = (float)(
(image_sum_a.ptr(y + h)[x + w] - image_sum_a.ptr(y)[x + w]) -
(image_sum_a.ptr(y + h)[x] - image_sum_a.ptr(y)[x]));
float image_sqsum_a_ = (float)(
(image_sqsum_a.ptr(y + h)[x + w] - image_sqsum_a.ptr(y)[x + w]) -
(image_sqsum_a.ptr(y + h)[x] - image_sqsum_a.ptr(y)[x]));
float num = result.ptr(y)[x] - image_sum_r_ * templ_sum_scale_r - image_sum_g_ * templ_sum_scale_g
- image_sum_b_ * templ_sum_scale_b - image_sum_a_ * templ_sum_scale_a;
float denum = sqrtf(templ_sqsum_scale * (image_sqsum_r_ - weight * image_sum_r_ * image_sum_r_
+ image_sqsum_g_ - weight * image_sum_g_ * image_sum_g_
+ image_sqsum_b_ - weight * image_sum_b_ * image_sum_b_
+ image_sqsum_a_ - weight * image_sum_a_ * image_sum_a_));
result.ptr(y)[x] = normAcc(num, denum);
}
}
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
PtrStepSzf result, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
float weight = 1.f / (w * h);
float templ_sum_scale_r = templ_sum_r * weight;
float templ_sum_scale_g = templ_sum_g * weight;
float templ_sum_scale_b = templ_sum_b * weight;
float templ_sum_scale_a = templ_sum_a * weight;
float templ_sqsum_scale = templ_sqsum_r - weight * templ_sum_r * templ_sum_r
+ templ_sqsum_g - weight * templ_sum_g * templ_sum_g
+ templ_sqsum_b - weight * templ_sum_b * templ_sum_b
+ templ_sqsum_a - weight * templ_sum_a * templ_sum_a;
matchTemplatePreparedKernel_CCOFF_NORMED_8UC4<<<grid, threads, 0, stream>>>(
w, h, weight,
templ_sum_scale_r, templ_sum_scale_g, templ_sum_scale_b, templ_sum_scale_a,
templ_sqsum_scale,
image_sum_r, image_sqsum_r,
image_sum_g, image_sqsum_g,
image_sum_b, image_sqsum_b,
image_sum_a, image_sqsum_a,
result);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
//////////////////////////////////////////////////////////////////////
// normalize
template <int cn>
__global__ void normalizeKernel_8U(
int w, int h, const PtrStep<unsigned long long> image_sqsum,
unsigned long long templ_sqsum, PtrStepSzf result)
{
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
float image_sqsum_ = (float)(
(image_sqsum.ptr(y + h)[(x + w) * cn] - image_sqsum.ptr(y)[(x + w) * cn]) -
(image_sqsum.ptr(y + h)[x * cn] - image_sqsum.ptr(y)[x * cn]));
result.ptr(y)[x] = normAcc(result.ptr(y)[x], sqrtf(image_sqsum_ * templ_sqsum));
}
}
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
switch (cn)
{
case 1:
normalizeKernel_8U<1><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
break;
case 2:
normalizeKernel_8U<2><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
break;
case 3:
normalizeKernel_8U<3><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
break;
case 4:
normalizeKernel_8U<4><<<grid, threads, 0, stream>>>(w, h, image_sqsum, templ_sqsum, result);
break;
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
//////////////////////////////////////////////////////////////////////
// extractFirstChannel
template <int cn>
__global__ void extractFirstChannel_32F(const PtrStepb image, PtrStepSzf result)
{
typedef typename TypeVec<float, cn>::vec_type Typef;
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < result.cols && y < result.rows)
{
Typef val = ((const Typef*)image.ptr(y))[x];
result.ptr(y)[x] = first(val);
}
}
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(result.cols, threads.x), divUp(result.rows, threads.y));
switch (cn)
{
case 1:
extractFirstChannel_32F<1><<<grid, threads, 0, stream>>>(image, result);
break;
case 2:
extractFirstChannel_32F<2><<<grid, threads, 0, stream>>>(image, result);
break;
case 3:
extractFirstChannel_32F<3><<<grid, threads, 0, stream>>>(image, result);
break;
case 4:
extractFirstChannel_32F<4><<<grid, threads, 0, stream>>>(image, result);
break;
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
} //namespace match_template
}}} // namespace cv { namespace cuda { namespace cudev
#endif /* CUDA_DISABLER */

View File

@@ -0,0 +1,182 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#if !defined CUDA_DISABLER
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/vec_traits.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/border_interpolate.hpp"
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
texture<uchar4, 2> tex_meanshift;
__device__ short2 do_mean_shift(int x0, int y0, unsigned char* out,
size_t out_step, int cols, int rows,
int sp, int sr, int maxIter, float eps)
{
int isr2 = sr*sr;
uchar4 c = tex2D(tex_meanshift, x0, y0 );
// iterate meanshift procedure
for( int iter = 0; iter < maxIter; iter++ )
{
int count = 0;
int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
float icount;
//mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
int minx = x0-sp;
int miny = y0-sp;
int maxx = x0+sp;
int maxy = y0+sp;
for( int y = miny; y <= maxy; y++)
{
int rowCount = 0;
for( int x = minx; x <= maxx; x++ )
{
uchar4 t = tex2D( tex_meanshift, x, y );
int norm2 = (t.x - c.x) * (t.x - c.x) + (t.y - c.y) * (t.y - c.y) + (t.z - c.z) * (t.z - c.z);
if( norm2 <= isr2 )
{
s0 += t.x; s1 += t.y; s2 += t.z;
sx += x; rowCount++;
}
}
count += rowCount;
sy += y*rowCount;
}
if( count == 0 )
break;
icount = 1.f/count;
int x1 = __float2int_rz(sx*icount);
int y1 = __float2int_rz(sy*icount);
s0 = __float2int_rz(s0*icount);
s1 = __float2int_rz(s1*icount);
s2 = __float2int_rz(s2*icount);
int norm2 = (s0 - c.x) * (s0 - c.x) + (s1 - c.y) * (s1 - c.y) + (s2 - c.z) * (s2 - c.z);
bool stopFlag = (x0 == x1 && y0 == y1) || (::abs(x1-x0) + ::abs(y1-y0) + norm2 <= eps);
x0 = x1; y0 = y1;
c.x = s0; c.y = s1; c.z = s2;
if( stopFlag )
break;
}
int base = (blockIdx.y * blockDim.y + threadIdx.y) * out_step + (blockIdx.x * blockDim.x + threadIdx.x) * 4 * sizeof(uchar);
*(uchar4*)(out + base) = c;
return make_short2((short)x0, (short)y0);
}
__global__ void meanshift_kernel(unsigned char* out, size_t out_step, int cols, int rows, int sp, int sr, int maxIter, float eps )
{
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
if( x0 < cols && y0 < rows )
do_mean_shift(x0, y0, out, out_step, cols, rows, sp, sr, maxIter, eps);
}
void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
{
dim3 grid(1, 1, 1);
dim3 threads(32, 8, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
meanshift_kernel<<< grid, threads, 0, stream >>>( dst.data, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
__global__ void meanshiftproc_kernel(unsigned char* outr, size_t outrstep,
unsigned char* outsp, size_t outspstep,
int cols, int rows,
int sp, int sr, int maxIter, float eps)
{
int x0 = blockIdx.x * blockDim.x + threadIdx.x;
int y0 = blockIdx.y * blockDim.y + threadIdx.y;
if( x0 < cols && y0 < rows )
{
int basesp = (blockIdx.y * blockDim.y + threadIdx.y) * outspstep + (blockIdx.x * blockDim.x + threadIdx.x) * 2 * sizeof(short);
*(short2*)(outsp + basesp) = do_mean_shift(x0, y0, outr, outrstep, cols, rows, sp, sr, maxIter, eps);
}
}
void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream)
{
dim3 grid(1, 1, 1);
dim3 threads(32, 8, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, tex_meanshift, src.data, desc, src.cols, src.rows, src.step ) );
meanshiftproc_kernel<<< grid, threads, 0, stream >>>( dstr.data, dstr.step, dstsp.data, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
}}}
#endif

View File

@@ -0,0 +1,274 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __cvt_color_internal_h__
#define __cvt_color_internal_h__
namespace cv { namespace cuda { namespace device
{
#define OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name) \
void name(PtrStepSzb src, PtrStepSzb dst, cudaStream_t stream);
#define OPENCV_GPU_DECLARE_CVTCOLOR_ALL(name) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _16u) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f)
#define OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(name) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f)
#define OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(name) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _8u) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _32f) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_8u) \
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(name ## _full_32f)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr555)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr_to_bgr565)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr555)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgb_to_bgr565)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr555)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgra_to_bgr565)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr555)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(rgba_to_bgr565)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(gray_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr555)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(gray_to_bgr565)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr555_to_gray)
OPENCV_GPU_DECLARE_CVTCOLOR_ONE(bgr565_to_gray)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_gray)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_gray)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_gray)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_gray)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_yuv4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_yuv4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_yuv4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_yuv4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(yuv4_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_YCrCb4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_YCrCb4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_YCrCb4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_YCrCb4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(YCrCb4_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgb_to_xyz4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(rgba_to_xyz4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgr_to_xyz4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(bgra_to_xyz4)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_ALL(xyz4_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hsv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hsv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hsv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hsv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hsv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hsv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hsv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hsv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hsv4_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hls)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hls)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgb_to_hls4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(rgba_to_hls4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hls)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hls)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgr_to_hls4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(bgra_to_hls4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL(hls4_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_lab)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_lab4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lrgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lrgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lrgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lrgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lbgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lbgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab_to_lbgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lab4_to_lbgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgb_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(rgba_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgr_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(bgra_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgb_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lrgba_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_luv)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgr_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(lbgra_to_luv4)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_rgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_rgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_bgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_bgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lrgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lrgb)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lrgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lrgba)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lbgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lbgr)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv_to_lbgra)
OPENCV_GPU_DECLARE_CVTCOLOR_8U32F(luv4_to_lbgra)
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ONE
#undef OPENCV_GPU_DECLARE_CVTCOLOR_ALL
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F
#undef OPENCV_GPU_DECLARE_CVTCOLOR_8U32F_FULL
}}}
#endif

View File

@@ -0,0 +1,906 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_CUDAARITHM)
Ptr<GeneralizedHoughBallard> cv::cuda::createGeneralizedHoughBallard() { throw_no_cuda(); return Ptr<GeneralizedHoughBallard>(); }
Ptr<GeneralizedHoughGuil> cv::cuda::createGeneralizedHoughGuil() { throw_no_cuda(); return Ptr<GeneralizedHoughGuil>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace cuda { namespace device
{
namespace ght
{
template <typename T>
int buildEdgePointList_gpu(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
void buildRTable_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, int* r_sizes,
short2 templCenter, int levels);
void Ballard_Pos_calcHist_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
PtrStepSz<short2> r_table, const int* r_sizes,
PtrStepSzi hist,
float dp, int levels);
int Ballard_Pos_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int maxSize, float dp, int threshold);
void Guil_Full_setTemplFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
void Guil_Full_setImageFeatures(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
void Guil_Full_buildTemplFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize,
float xi, float angleEpsilon, int levels,
float2 center, float maxDist);
void Guil_Full_buildImageFeatureList_gpu(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize,
float xi, float angleEpsilon, int levels,
float2 center, float maxDist);
void Guil_Full_calcOHist_gpu(const int* templSizes, const int* imageSizes, int* OHist,
float minAngle, float maxAngle, float angleStep, int angleRange,
int levels, int tMaxSize);
void Guil_Full_calcSHist_gpu(const int* templSizes, const int* imageSizes, int* SHist,
float angle, float angleEpsilon,
float minScale, float maxScale, float iScaleStep, int scaleRange,
int levels, int tMaxSize);
void Guil_Full_calcPHist_gpu(const int* templSizes, const int* imageSizes, PtrStepSzi PHist,
float angle, float angleEpsilon, float scale,
float dp,
int levels, int tMaxSize);
int Guil_Full_findPosInHist_gpu(PtrStepSzi hist, float4* out, int3* votes, int curSize, int maxSize,
float angle, int angleVotes, float scale, int scaleVotes,
float dp, int threshold);
}
}}}
// common
namespace
{
class GeneralizedHoughBase
{
protected:
GeneralizedHoughBase();
virtual ~GeneralizedHoughBase() {}
void setTemplateImpl(InputArray templ, Point templCenter);
void setTemplateImpl(InputArray edges, InputArray dx, InputArray dy, Point templCenter);
void detectImpl(InputArray image, OutputArray positions, OutputArray votes);
void detectImpl(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes);
void buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy);
virtual void processTempl() = 0;
virtual void processImage() = 0;
int cannyLowThresh_;
int cannyHighThresh_;
double minDist_;
double dp_;
int maxBufferSize_;
Size templSize_;
Point templCenter_;
GpuMat templEdges_;
GpuMat templDx_;
GpuMat templDy_;
Size imageSize_;
GpuMat imageEdges_;
GpuMat imageDx_;
GpuMat imageDy_;
GpuMat edgePointList_;
GpuMat outBuf_;
int posCount_;
private:
#ifdef HAVE_OPENCV_CUDAFILTERS
void calcEdges(InputArray src, GpuMat& edges, GpuMat& dx, GpuMat& dy);
#endif
void filterMinDist();
void convertTo(OutputArray positions, OutputArray votes);
#ifdef HAVE_OPENCV_CUDAFILTERS
Ptr<cuda::CannyEdgeDetector> canny_;
Ptr<cuda::Filter> filterDx_;
Ptr<cuda::Filter> filterDy_;
#endif
std::vector<float4> oldPosBuf_;
std::vector<int3> oldVoteBuf_;
std::vector<float4> newPosBuf_;
std::vector<int3> newVoteBuf_;
std::vector<int> indexies_;
};
GeneralizedHoughBase::GeneralizedHoughBase()
{
cannyLowThresh_ = 50;
cannyHighThresh_ = 100;
minDist_ = 1.0;
dp_ = 1.0;
maxBufferSize_ = 10000;
#ifdef HAVE_OPENCV_CUDAFILTERS
canny_ = cuda::createCannyEdgeDetector(cannyLowThresh_, cannyHighThresh_);
filterDx_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
#endif
}
#ifdef HAVE_OPENCV_CUDAFILTERS
void GeneralizedHoughBase::calcEdges(InputArray _src, GpuMat& edges, GpuMat& dx, GpuMat& dy)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( cannyLowThresh_ > 0 && cannyLowThresh_ < cannyHighThresh_ );
ensureSizeIsEnough(src.size(), CV_32SC1, dx);
ensureSizeIsEnough(src.size(), CV_32SC1, dy);
filterDx_->apply(src, dx);
filterDy_->apply(src, dy);
ensureSizeIsEnough(src.size(), CV_8UC1, edges);
canny_->setLowThreshold(cannyLowThresh_);
canny_->setHighThreshold(cannyHighThresh_);
canny_->detect(dx, dy, edges);
}
#endif
void GeneralizedHoughBase::setTemplateImpl(InputArray templ, Point templCenter)
{
#ifndef HAVE_OPENCV_CUDAFILTERS
(void) templ;
(void) templCenter;
throw_no_cuda();
#else
calcEdges(templ, templEdges_, templDx_, templDy_);
if (templCenter == Point(-1, -1))
templCenter = Point(templEdges_.cols / 2, templEdges_.rows / 2);
templSize_ = templEdges_.size();
templCenter_ = templCenter;
processTempl();
#endif
}
void GeneralizedHoughBase::setTemplateImpl(InputArray edges, InputArray dx, InputArray dy, Point templCenter)
{
edges.getGpuMat().copyTo(templEdges_);
dx.getGpuMat().copyTo(templDx_);
dy.getGpuMat().copyTo(templDy_);
CV_Assert( templEdges_.type() == CV_8UC1 );
CV_Assert( templDx_.type() == CV_32FC1 && templDx_.size() == templEdges_.size() );
CV_Assert( templDy_.type() == templDx_.type() && templDy_.size() == templEdges_.size() );
if (templCenter == Point(-1, -1))
templCenter = Point(templEdges_.cols / 2, templEdges_.rows / 2);
templSize_ = templEdges_.size();
templCenter_ = templCenter;
processTempl();
}
void GeneralizedHoughBase::detectImpl(InputArray image, OutputArray positions, OutputArray votes)
{
#ifndef HAVE_OPENCV_CUDAFILTERS
(void) templ;
(void) templCenter;
throw_no_cuda();
#else
calcEdges(image, imageEdges_, imageDx_, imageDy_);
imageSize_ = imageEdges_.size();
posCount_ = 0;
processImage();
if (posCount_ == 0)
{
positions.release();
if (votes.needed())
votes.release();
}
else
{
if (minDist_ > 1)
filterMinDist();
convertTo(positions, votes);
}
#endif
}
void GeneralizedHoughBase::detectImpl(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes)
{
edges.getGpuMat().copyTo(imageEdges_);
dx.getGpuMat().copyTo(imageDx_);
dy.getGpuMat().copyTo(imageDy_);
CV_Assert( imageEdges_.type() == CV_8UC1 );
CV_Assert( imageDx_.type() == CV_32FC1 && imageDx_.size() == imageEdges_.size() );
CV_Assert( imageDy_.type() == imageDx_.type() && imageDy_.size() == imageEdges_.size() );
imageSize_ = imageEdges_.size();
posCount_ = 0;
processImage();
if (posCount_ == 0)
{
positions.release();
if (votes.needed())
votes.release();
}
else
{
if (minDist_ > 1)
filterMinDist();
convertTo(positions, votes);
}
}
void GeneralizedHoughBase::buildEdgePointList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy)
{
using namespace cv::cuda::device::ght;
typedef int (*func_t)(PtrStepSzb edges, PtrStepSzb dx, PtrStepSzb dy, unsigned int* coordList, float* thetaList);
static const func_t funcs[] =
{
0,
0,
0,
buildEdgePointList_gpu<short>,
buildEdgePointList_gpu<int>,
buildEdgePointList_gpu<float>,
0
};
CV_Assert( edges.type() == CV_8UC1 );
CV_Assert( dx.size() == edges.size() );
CV_Assert( dy.type() == dx.type() && dy.size() == edges.size() );
const func_t func = funcs[dx.depth()];
CV_Assert( func != 0 );
edgePointList_.cols = (int) (edgePointList_.step / sizeof(int));
ensureSizeIsEnough(2, edges.size().area(), CV_32SC1, edgePointList_);
edgePointList_.cols = func(edges, dx, dy, edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1));
}
struct IndexCmp
{
const int3* aux;
explicit IndexCmp(const int3* _aux) : aux(_aux) {}
bool operator ()(int l1, int l2) const
{
return aux[l1].x > aux[l2].x;
}
};
void GeneralizedHoughBase::filterMinDist()
{
oldPosBuf_.resize(posCount_);
oldVoteBuf_.resize(posCount_);
cudaSafeCall( cudaMemcpy(&oldPosBuf_[0], outBuf_.ptr(0), posCount_ * sizeof(float4), cudaMemcpyDeviceToHost) );
cudaSafeCall( cudaMemcpy(&oldVoteBuf_[0], outBuf_.ptr(1), posCount_ * sizeof(int3), cudaMemcpyDeviceToHost) );
indexies_.resize(posCount_);
for (int i = 0; i < posCount_; ++i)
indexies_[i] = i;
std::sort(indexies_.begin(), indexies_.end(), IndexCmp(&oldVoteBuf_[0]));
newPosBuf_.clear();
newVoteBuf_.clear();
newPosBuf_.reserve(posCount_);
newVoteBuf_.reserve(posCount_);
const int cellSize = cvRound(minDist_);
const int gridWidth = (imageSize_.width + cellSize - 1) / cellSize;
const int gridHeight = (imageSize_.height + cellSize - 1) / cellSize;
std::vector< std::vector<Point2f> > grid(gridWidth * gridHeight);
const double minDist2 = minDist_ * minDist_;
for (int i = 0; i < posCount_; ++i)
{
const int ind = indexies_[i];
Point2f p(oldPosBuf_[ind].x, oldPosBuf_[ind].y);
bool good = true;
const int xCell = static_cast<int>(p.x / cellSize);
const int yCell = static_cast<int>(p.y / cellSize);
int x1 = xCell - 1;
int y1 = yCell - 1;
int x2 = xCell + 1;
int y2 = yCell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(gridWidth - 1, x2);
y2 = std::min(gridHeight - 1, y2);
for (int yy = y1; yy <= y2; ++yy)
{
for (int xx = x1; xx <= x2; ++xx)
{
const std::vector<Point2f>& m = grid[yy * gridWidth + xx];
for(size_t j = 0; j < m.size(); ++j)
{
const Point2f d = p - m[j];
if (d.ddot(d) < minDist2)
{
good = false;
goto break_out;
}
}
}
}
break_out:
if(good)
{
grid[yCell * gridWidth + xCell].push_back(p);
newPosBuf_.push_back(oldPosBuf_[ind]);
newVoteBuf_.push_back(oldVoteBuf_[ind]);
}
}
posCount_ = static_cast<int>(newPosBuf_.size());
cudaSafeCall( cudaMemcpy(outBuf_.ptr(0), &newPosBuf_[0], posCount_ * sizeof(float4), cudaMemcpyHostToDevice) );
cudaSafeCall( cudaMemcpy(outBuf_.ptr(1), &newVoteBuf_[0], posCount_ * sizeof(int3), cudaMemcpyHostToDevice) );
}
void GeneralizedHoughBase::convertTo(OutputArray positions, OutputArray votes)
{
ensureSizeIsEnough(1, posCount_, CV_32FC4, positions);
GpuMat(1, posCount_, CV_32FC4, outBuf_.ptr(0), outBuf_.step).copyTo(positions);
if (votes.needed())
{
ensureSizeIsEnough(1, posCount_, CV_32FC3, votes);
GpuMat(1, posCount_, CV_32FC4, outBuf_.ptr(1), outBuf_.step).copyTo(votes);
}
}
}
// GeneralizedHoughBallard
namespace
{
class GeneralizedHoughBallardImpl : public GeneralizedHoughBallard, private GeneralizedHoughBase
{
public:
GeneralizedHoughBallardImpl();
void setTemplate(InputArray templ, Point templCenter) { setTemplateImpl(templ, templCenter); }
void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter) { setTemplateImpl(edges, dx, dy, templCenter); }
void detect(InputArray image, OutputArray positions, OutputArray votes) { detectImpl(image, positions, votes); }
void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes) { detectImpl(edges, dx, dy, positions, votes); }
void setCannyLowThresh(int cannyLowThresh) { cannyLowThresh_ = cannyLowThresh; }
int getCannyLowThresh() const { return cannyLowThresh_; }
void setCannyHighThresh(int cannyHighThresh) { cannyHighThresh_ = cannyHighThresh; }
int getCannyHighThresh() const { return cannyHighThresh_; }
void setMinDist(double minDist) { minDist_ = minDist; }
double getMinDist() const { return minDist_; }
void setDp(double dp) { dp_ = dp; }
double getDp() const { return dp_; }
void setMaxBufferSize(int maxBufferSize) { maxBufferSize_ = maxBufferSize; }
int getMaxBufferSize() const { return maxBufferSize_; }
void setLevels(int levels) { levels_ = levels; }
int getLevels() const { return levels_; }
void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; }
int getVotesThreshold() const { return votesThreshold_; }
private:
void processTempl();
void processImage();
void calcHist();
void findPosInHist();
int levels_;
int votesThreshold_;
GpuMat r_table_;
GpuMat r_sizes_;
GpuMat hist_;
};
GeneralizedHoughBallardImpl::GeneralizedHoughBallardImpl()
{
levels_ = 360;
votesThreshold_ = 100;
}
void GeneralizedHoughBallardImpl::processTempl()
{
using namespace cv::cuda::device::ght;
CV_Assert( levels_ > 0 );
buildEdgePointList(templEdges_, templDx_, templDy_);
ensureSizeIsEnough(levels_ + 1, maxBufferSize_, CV_16SC2, r_table_);
ensureSizeIsEnough(1, levels_ + 1, CV_32SC1, r_sizes_);
r_sizes_.setTo(Scalar::all(0));
if (edgePointList_.cols > 0)
{
buildRTable_gpu(edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1), edgePointList_.cols,
r_table_, r_sizes_.ptr<int>(), make_short2(templCenter_.x, templCenter_.y), levels_);
cuda::min(r_sizes_, maxBufferSize_, r_sizes_);
}
}
void GeneralizedHoughBallardImpl::processImage()
{
calcHist();
findPosInHist();
}
void GeneralizedHoughBallardImpl::calcHist()
{
using namespace cv::cuda::device::ght;
CV_Assert( levels_ > 0 && r_table_.rows == (levels_ + 1) && r_sizes_.cols == (levels_ + 1) );
CV_Assert( dp_ > 0.0);
const double idp = 1.0 / dp_;
buildEdgePointList(imageEdges_, imageDx_, imageDy_);
ensureSizeIsEnough(cvCeil(imageSize_.height * idp) + 2, cvCeil(imageSize_.width * idp) + 2, CV_32SC1, hist_);
hist_.setTo(Scalar::all(0));
if (edgePointList_.cols > 0)
{
Ballard_Pos_calcHist_gpu(edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1), edgePointList_.cols,
r_table_, r_sizes_.ptr<int>(),
hist_,
(float)dp_, levels_);
}
}
void GeneralizedHoughBallardImpl::findPosInHist()
{
using namespace cv::cuda::device::ght;
CV_Assert( votesThreshold_ > 0 );
ensureSizeIsEnough(2, maxBufferSize_, CV_32FC4, outBuf_);
posCount_ = Ballard_Pos_findPosInHist_gpu(hist_, outBuf_.ptr<float4>(0), outBuf_.ptr<int3>(1), maxBufferSize_, (float)dp_, votesThreshold_);
}
}
Ptr<GeneralizedHoughBallard> cv::cuda::createGeneralizedHoughBallard()
{
return new GeneralizedHoughBallardImpl;
}
// GeneralizedHoughGuil
namespace
{
class GeneralizedHoughGuilImpl : public GeneralizedHoughGuil, private GeneralizedHoughBase
{
public:
GeneralizedHoughGuilImpl();
void setTemplate(InputArray templ, Point templCenter) { setTemplateImpl(templ, templCenter); }
void setTemplate(InputArray edges, InputArray dx, InputArray dy, Point templCenter) { setTemplateImpl(edges, dx, dy, templCenter); }
void detect(InputArray image, OutputArray positions, OutputArray votes) { detectImpl(image, positions, votes); }
void detect(InputArray edges, InputArray dx, InputArray dy, OutputArray positions, OutputArray votes) { detectImpl(edges, dx, dy, positions, votes); }
void setCannyLowThresh(int cannyLowThresh) { cannyLowThresh_ = cannyLowThresh; }
int getCannyLowThresh() const { return cannyLowThresh_; }
void setCannyHighThresh(int cannyHighThresh) { cannyHighThresh_ = cannyHighThresh; }
int getCannyHighThresh() const { return cannyHighThresh_; }
void setMinDist(double minDist) { minDist_ = minDist; }
double getMinDist() const { return minDist_; }
void setDp(double dp) { dp_ = dp; }
double getDp() const { return dp_; }
void setMaxBufferSize(int maxBufferSize) { maxBufferSize_ = maxBufferSize; }
int getMaxBufferSize() const { return maxBufferSize_; }
void setXi(double xi) { xi_ = xi; }
double getXi() const { return xi_; }
void setLevels(int levels) { levels_ = levels; }
int getLevels() const { return levels_; }
void setAngleEpsilon(double angleEpsilon) { angleEpsilon_ = angleEpsilon; }
double getAngleEpsilon() const { return angleEpsilon_; }
void setMinAngle(double minAngle) { minAngle_ = minAngle; }
double getMinAngle() const { return minAngle_; }
void setMaxAngle(double maxAngle) { maxAngle_ = maxAngle; }
double getMaxAngle() const { return maxAngle_; }
void setAngleStep(double angleStep) { angleStep_ = angleStep; }
double getAngleStep() const { return angleStep_; }
void setAngleThresh(int angleThresh) { angleThresh_ = angleThresh; }
int getAngleThresh() const { return angleThresh_; }
void setMinScale(double minScale) { minScale_ = minScale; }
double getMinScale() const { return minScale_; }
void setMaxScale(double maxScale) { maxScale_ = maxScale; }
double getMaxScale() const { return maxScale_; }
void setScaleStep(double scaleStep) { scaleStep_ = scaleStep; }
double getScaleStep() const { return scaleStep_; }
void setScaleThresh(int scaleThresh) { scaleThresh_ = scaleThresh; }
int getScaleThresh() const { return scaleThresh_; }
void setPosThresh(int posThresh) { posThresh_ = posThresh; }
int getPosThresh() const { return posThresh_; }
private:
void processTempl();
void processImage();
double xi_;
int levels_;
double angleEpsilon_;
double minAngle_;
double maxAngle_;
double angleStep_;
int angleThresh_;
double minScale_;
double maxScale_;
double scaleStep_;
int scaleThresh_;
int posThresh_;
struct Feature
{
GpuMat p1_pos;
GpuMat p1_theta;
GpuMat p2_pos;
GpuMat d12;
GpuMat r1;
GpuMat r2;
GpuMat sizes;
int maxSize;
void create(int levels, int maxCapacity, bool isTempl);
};
typedef void (*set_func_t)(PtrStepb p1_pos, PtrStepb p1_theta, PtrStepb p2_pos, PtrStepb d12, PtrStepb r1, PtrStepb r2);
typedef void (*build_func_t)(const unsigned int* coordList, const float* thetaList, int pointsCount,
int* sizes, int maxSize,
float xi, float angleEpsilon, int levels,
float2 center, float maxDist);
void buildFeatureList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Feature& features,
set_func_t set_func, build_func_t build_func, bool isTempl, Point2d center = Point2d());
void calcOrientation();
void calcScale(double angle);
void calcPosition(double angle, int angleVotes, double scale, int scaleVotes);
Feature templFeatures_;
Feature imageFeatures_;
std::vector< std::pair<double, int> > angles_;
std::vector< std::pair<double, int> > scales_;
GpuMat hist_;
std::vector<int> h_buf_;
};
double toRad(double a)
{
return a * CV_PI / 180.0;
}
double clampAngle(double a)
{
double res = a;
while (res > 360.0)
res -= 360.0;
while (res < 0)
res += 360.0;
return res;
}
bool angleEq(double a, double b, double eps = 1.0)
{
return (fabs(clampAngle(a - b)) <= eps);
}
GeneralizedHoughGuilImpl::GeneralizedHoughGuilImpl()
{
maxBufferSize_ = 1000;
xi_ = 90.0;
levels_ = 360;
angleEpsilon_ = 1.0;
minAngle_ = 0.0;
maxAngle_ = 360.0;
angleStep_ = 1.0;
angleThresh_ = 15000;
minScale_ = 0.5;
maxScale_ = 2.0;
scaleStep_ = 0.05;
scaleThresh_ = 1000;
posThresh_ = 100;
}
void GeneralizedHoughGuilImpl::processTempl()
{
using namespace cv::cuda::device::ght;
buildFeatureList(templEdges_, templDx_, templDy_, templFeatures_,
Guil_Full_setTemplFeatures, Guil_Full_buildTemplFeatureList_gpu,
true, templCenter_);
h_buf_.resize(templFeatures_.sizes.cols);
cudaSafeCall( cudaMemcpy(&h_buf_[0], templFeatures_.sizes.data, h_buf_.size() * sizeof(int), cudaMemcpyDeviceToHost) );
templFeatures_.maxSize = *std::max_element(h_buf_.begin(), h_buf_.end());
}
void GeneralizedHoughGuilImpl::processImage()
{
using namespace cv::cuda::device::ght;
CV_Assert( levels_ > 0 );
CV_Assert( templFeatures_.sizes.cols == levels_ + 1 );
CV_Assert( minAngle_ >= 0.0 && minAngle_ < maxAngle_ && maxAngle_ <= 360.0 );
CV_Assert( angleStep_ > 0.0 && angleStep_ < 360.0 );
CV_Assert( angleThresh_ > 0 );
CV_Assert( minScale_ > 0.0 && minScale_ < maxScale_ );
CV_Assert( scaleStep_ > 0.0 );
CV_Assert( scaleThresh_ > 0 );
CV_Assert( dp_ > 0.0 );
CV_Assert( posThresh_ > 0 );
const double iAngleStep = 1.0 / angleStep_;
const int angleRange = cvCeil((maxAngle_ - minAngle_) * iAngleStep);
const double iScaleStep = 1.0 / scaleStep_;
const int scaleRange = cvCeil((maxScale_ - minScale_) * iScaleStep);
const double idp = 1.0 / dp_;
const int histRows = cvCeil(imageSize_.height * idp);
const int histCols = cvCeil(imageSize_.width * idp);
ensureSizeIsEnough(histRows + 2, std::max(angleRange + 1, std::max(scaleRange + 1, histCols + 2)), CV_32SC1, hist_);
h_buf_.resize(std::max(angleRange + 1, scaleRange + 1));
ensureSizeIsEnough(2, maxBufferSize_, CV_32FC4, outBuf_);
buildFeatureList(imageEdges_, imageDx_, imageDy_, imageFeatures_,
Guil_Full_setImageFeatures, Guil_Full_buildImageFeatureList_gpu,
false);
calcOrientation();
for (size_t i = 0; i < angles_.size(); ++i)
{
const double angle = angles_[i].first;
const int angleVotes = angles_[i].second;
calcScale(angle);
for (size_t j = 0; j < scales_.size(); ++j)
{
const double scale = scales_[j].first;
const int scaleVotes = scales_[j].second;
calcPosition(angle, angleVotes, scale, scaleVotes);
}
}
}
void GeneralizedHoughGuilImpl::Feature::create(int levels, int maxCapacity, bool isTempl)
{
if (!isTempl)
{
ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, p1_pos);
ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, p2_pos);
}
ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC1, p1_theta);
ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC1, d12);
if (isTempl)
{
ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, r1);
ensureSizeIsEnough(levels + 1, maxCapacity, CV_32FC2, r2);
}
ensureSizeIsEnough(1, levels + 1, CV_32SC1, sizes);
sizes.setTo(Scalar::all(0));
maxSize = 0;
}
void GeneralizedHoughGuilImpl::buildFeatureList(const GpuMat& edges, const GpuMat& dx, const GpuMat& dy, Feature& features,
set_func_t set_func, build_func_t build_func, bool isTempl, Point2d center)
{
CV_Assert( levels_ > 0 );
const double maxDist = sqrt((double) templSize_.width * templSize_.width + templSize_.height * templSize_.height) * maxScale_;
features.create(levels_, maxBufferSize_, isTempl);
set_func(features.p1_pos, features.p1_theta, features.p2_pos, features.d12, features.r1, features.r2);
buildEdgePointList(edges, dx, dy);
if (edgePointList_.cols > 0)
{
build_func(edgePointList_.ptr<unsigned int>(0), edgePointList_.ptr<float>(1), edgePointList_.cols,
features.sizes.ptr<int>(), maxBufferSize_, (float)xi_, (float)angleEpsilon_, levels_, make_float2((float)center.x, (float)center.y), (float)maxDist);
}
}
void GeneralizedHoughGuilImpl::calcOrientation()
{
using namespace cv::cuda::device::ght;
const double iAngleStep = 1.0 / angleStep_;
const int angleRange = cvCeil((maxAngle_ - minAngle_) * iAngleStep);
hist_.setTo(Scalar::all(0));
Guil_Full_calcOHist_gpu(templFeatures_.sizes.ptr<int>(), imageFeatures_.sizes.ptr<int>(0), hist_.ptr<int>(),
(float)minAngle_, (float)maxAngle_, (float)angleStep_, angleRange, levels_, templFeatures_.maxSize);
cudaSafeCall( cudaMemcpy(&h_buf_[0], hist_.data, h_buf_.size() * sizeof(int), cudaMemcpyDeviceToHost) );
angles_.clear();
for (int n = 0; n < angleRange; ++n)
{
if (h_buf_[n] >= angleThresh_)
{
const double angle = minAngle_ + n * angleStep_;
angles_.push_back(std::make_pair(angle, h_buf_[n]));
}
}
}
void GeneralizedHoughGuilImpl::calcScale(double angle)
{
using namespace cv::cuda::device::ght;
const double iScaleStep = 1.0 / scaleStep_;
const int scaleRange = cvCeil((maxScale_ - minScale_) * iScaleStep);
hist_.setTo(Scalar::all(0));
Guil_Full_calcSHist_gpu(templFeatures_.sizes.ptr<int>(), imageFeatures_.sizes.ptr<int>(0), hist_.ptr<int>(),
(float)angle, (float)angleEpsilon_, (float)minScale_, (float)maxScale_,
(float)iScaleStep, scaleRange, levels_, templFeatures_.maxSize);
cudaSafeCall( cudaMemcpy(&h_buf_[0], hist_.data, h_buf_.size() * sizeof(int), cudaMemcpyDeviceToHost) );
scales_.clear();
for (int s = 0; s < scaleRange; ++s)
{
if (h_buf_[s] >= scaleThresh_)
{
const double scale = minScale_ + s * scaleStep_;
scales_.push_back(std::make_pair(scale, h_buf_[s]));
}
}
}
void GeneralizedHoughGuilImpl::calcPosition(double angle, int angleVotes, double scale, int scaleVotes)
{
using namespace cv::cuda::device::ght;
hist_.setTo(Scalar::all(0));
Guil_Full_calcPHist_gpu(templFeatures_.sizes.ptr<int>(), imageFeatures_.sizes.ptr<int>(0), hist_,
(float)angle, (float)angleEpsilon_, (float)scale, (float)dp_, levels_, templFeatures_.maxSize);
posCount_ = Guil_Full_findPosInHist_gpu(hist_, outBuf_.ptr<float4>(0), outBuf_.ptr<int3>(1),
posCount_, maxBufferSize_, (float)angle, angleVotes,
(float)scale, scaleVotes, (float)dp_, posThresh_);
}
}
Ptr<GeneralizedHoughGuil> cv::cuda::createGeneralizedHoughGuil()
{
return new GeneralizedHoughGuilImpl;
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,215 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_CUDAARITHM)
Ptr<cuda::CornersDetector> cv::cuda::createGoodFeaturesToTrackDetector(int, int, double, double, int, bool, double) { throw_no_cuda(); return Ptr<cuda::CornersDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace cuda { namespace device
{
namespace gfft
{
int findCorners_gpu(PtrStepSzf eig, float threshold, PtrStepSzb mask, float2* corners, int max_count);
void sortCorners_gpu(PtrStepSzf eig, float2* corners, int count);
}
}}}
namespace
{
class GoodFeaturesToTrackDetector : public CornersDetector
{
public:
GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
int blockSize, bool useHarrisDetector, double harrisK);
void detect(InputArray image, OutputArray corners, InputArray mask = noArray());
private:
int maxCorners_;
double qualityLevel_;
double minDistance_;
Ptr<cuda::CornernessCriteria> cornerCriteria_;
GpuMat Dx_;
GpuMat Dy_;
GpuMat buf_;
GpuMat eig_;
GpuMat minMaxbuf_;
GpuMat tmpCorners_;
};
GoodFeaturesToTrackDetector::GoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
int blockSize, bool useHarrisDetector, double harrisK) :
maxCorners_(maxCorners), qualityLevel_(qualityLevel), minDistance_(minDistance)
{
CV_Assert( qualityLevel_ > 0 && minDistance_ >= 0 && maxCorners_ >= 0 );
cornerCriteria_ = useHarrisDetector ?
cuda::createHarrisCorner(srcType, blockSize, 3, harrisK) :
cuda::createMinEigenValCorner(srcType, blockSize, 3);
}
void GoodFeaturesToTrackDetector::detect(InputArray _image, OutputArray _corners, InputArray _mask)
{
using namespace cv::cuda::device::gfft;
GpuMat image = _image.getGpuMat();
GpuMat mask = _mask.getGpuMat();
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
ensureSizeIsEnough(image.size(), CV_32FC1, eig_);
cornerCriteria_->compute(image, eig_);
double maxVal = 0;
cuda::minMax(eig_, 0, &maxVal, noArray(), minMaxbuf_);
ensureSizeIsEnough(1, std::max(1000, static_cast<int>(image.size().area() * 0.05)), CV_32FC2, tmpCorners_);
int total = findCorners_gpu(eig_, static_cast<float>(maxVal * qualityLevel_), mask, tmpCorners_.ptr<float2>(), tmpCorners_.cols);
if (total == 0)
{
_corners.release();
return;
}
sortCorners_gpu(eig_, tmpCorners_.ptr<float2>(), total);
if (minDistance_ < 1)
{
tmpCorners_.colRange(0, maxCorners_ > 0 ? std::min(maxCorners_, total) : total).copyTo(_corners);
}
else
{
std::vector<Point2f> tmp(total);
Mat tmpMat(1, total, CV_32FC2, (void*)&tmp[0]);
tmpCorners_.colRange(0, total).download(tmpMat);
std::vector<Point2f> tmp2;
tmp2.reserve(total);
const int cell_size = cvRound(minDistance_);
const int grid_width = (image.cols + cell_size - 1) / cell_size;
const int grid_height = (image.rows + cell_size - 1) / cell_size;
std::vector< std::vector<Point2f> > grid(grid_width * grid_height);
for (int i = 0; i < total; ++i)
{
Point2f p = tmp[i];
bool good = true;
int x_cell = static_cast<int>(p.x / cell_size);
int y_cell = static_cast<int>(p.y / cell_size);
int x1 = x_cell - 1;
int y1 = y_cell - 1;
int x2 = x_cell + 1;
int y2 = y_cell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(grid_width - 1, x2);
y2 = std::min(grid_height - 1, y2);
for (int yy = y1; yy <= y2; yy++)
{
for (int xx = x1; xx <= x2; xx++)
{
std::vector<Point2f>& m = grid[yy * grid_width + xx];
if (!m.empty())
{
for(size_t j = 0; j < m.size(); j++)
{
float dx = p.x - m[j].x;
float dy = p.y - m[j].y;
if (dx * dx + dy * dy < minDistance_ * minDistance_)
{
good = false;
goto break_out;
}
}
}
}
}
break_out:
if(good)
{
grid[y_cell * grid_width + x_cell].push_back(p);
tmp2.push_back(p);
if (maxCorners_ > 0 && tmp2.size() == static_cast<size_t>(maxCorners_))
break;
}
}
_corners.create(1, static_cast<int>(tmp2.size()), CV_32FC2);
GpuMat corners = _corners.getGpuMat();
corners.upload(Mat(1, static_cast<int>(tmp2.size()), CV_32FC2, &tmp2[0]));
}
}
}
Ptr<cuda::CornersDetector> cv::cuda::createGoodFeaturesToTrackDetector(int srcType, int maxCorners, double qualityLevel, double minDistance,
int blockSize, bool useHarrisDetector, double harrisK)
{
return new GoodFeaturesToTrackDetector(srcType, maxCorners, qualityLevel, minDistance, blockSize, useHarrisDetector, harrisK);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,579 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::cuda::calcHist(InputArray, OutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::equalizeHist(InputArray, OutputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double, cv::Size) { throw_no_cuda(); return cv::Ptr<cv::cuda::CLAHE>(); }
void cv::cuda::evenLevels(OutputArray, int, int, int) { throw_no_cuda(); }
void cv::cuda::histEven(InputArray, OutputArray, InputOutputArray, int, int, int, Stream&) { throw_no_cuda(); }
void cv::cuda::histEven(InputArray, GpuMat*, InputOutputArray, int*, int*, int*, Stream&) { throw_no_cuda(); }
void cv::cuda::histRange(InputArray, OutputArray, InputArray, InputOutputArray, Stream&) { throw_no_cuda(); }
void cv::cuda::histRange(InputArray, GpuMat*, const GpuMat*, InputOutputArray, Stream&) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
////////////////////////////////////////////////////////////////////////
// calcHist
namespace hist
{
void histogram256(PtrStepSzb src, int* hist, cudaStream_t stream);
}
void cv::cuda::calcHist(InputArray _src, OutputArray _hist, Stream& stream)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
_hist.create(1, 256, CV_32SC1);
GpuMat hist = _hist.getGpuMat();
hist.setTo(Scalar::all(0), stream);
hist::histogram256(src, hist.ptr<int>(), StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
// equalizeHist
namespace hist
{
void equalizeHist(PtrStepSzb src, PtrStepSzb dst, const int* lut, cudaStream_t stream);
}
void cv::cuda::equalizeHist(InputArray _src, OutputArray _dst, InputOutputArray _buf, Stream& _stream)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
int intBufSize;
nppSafeCall( nppsIntegralGetBufferSize_32s(256, &intBufSize) );
size_t bufSize = intBufSize + 2 * 256 * sizeof(int);
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, _buf);
GpuMat buf = _buf.getGpuMat();
GpuMat hist(1, 256, CV_32SC1, buf.data);
GpuMat lut(1, 256, CV_32SC1, buf.data + 256 * sizeof(int));
GpuMat intBuf(1, intBufSize, CV_8UC1, buf.data + 2 * 256 * sizeof(int));
cuda::calcHist(src, hist, _stream);
cudaStream_t stream = StreamAccessor::getStream(_stream);
NppStreamHandler h(stream);
nppSafeCall( nppsIntegral_32s(hist.ptr<Npp32s>(), lut.ptr<Npp32s>(), 256, intBuf.ptr<Npp8u>()) );
hist::equalizeHist(src, dst, lut.ptr<int>(), stream);
}
////////////////////////////////////////////////////////////////////////
// CLAHE
namespace clahe
{
void calcLut(PtrStepSzb src, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, int clipLimit, float lutScale, cudaStream_t stream);
void transform(PtrStepSzb src, PtrStepSzb dst, PtrStepb lut, int tilesX, int tilesY, int2 tileSize, cudaStream_t stream);
}
namespace
{
class CLAHE_Impl : public cv::cuda::CLAHE
{
public:
CLAHE_Impl(double clipLimit = 40.0, int tilesX = 8, int tilesY = 8);
cv::AlgorithmInfo* info() const;
void apply(cv::InputArray src, cv::OutputArray dst);
void apply(InputArray src, OutputArray dst, Stream& stream);
void setClipLimit(double clipLimit);
double getClipLimit() const;
void setTilesGridSize(cv::Size tileGridSize);
cv::Size getTilesGridSize() const;
void collectGarbage();
private:
double clipLimit_;
int tilesX_;
int tilesY_;
GpuMat srcExt_;
GpuMat lut_;
};
CLAHE_Impl::CLAHE_Impl(double clipLimit, int tilesX, int tilesY) :
clipLimit_(clipLimit), tilesX_(tilesX), tilesY_(tilesY)
{
}
CV_INIT_ALGORITHM(CLAHE_Impl, "CLAHE_GPU",
obj.info()->addParam(obj, "clipLimit", obj.clipLimit_);
obj.info()->addParam(obj, "tilesX", obj.tilesX_);
obj.info()->addParam(obj, "tilesY", obj.tilesY_))
void CLAHE_Impl::apply(cv::InputArray _src, cv::OutputArray _dst)
{
apply(_src, _dst, Stream::Null());
}
void CLAHE_Impl::apply(InputArray _src, OutputArray _dst, Stream& s)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
_dst.create( src.size(), src.type() );
GpuMat dst = _dst.getGpuMat();
const int histSize = 256;
ensureSizeIsEnough(tilesX_ * tilesY_, histSize, CV_8UC1, lut_);
cudaStream_t stream = StreamAccessor::getStream(s);
cv::Size tileSize;
GpuMat srcForLut;
if (src.cols % tilesX_ == 0 && src.rows % tilesY_ == 0)
{
tileSize = cv::Size(src.cols / tilesX_, src.rows / tilesY_);
srcForLut = src;
}
else
{
#ifndef HAVE_OPENCV_CUDAARITHM
throw_no_cuda();
#else
cv::cuda::copyMakeBorder(src, srcExt_, 0, tilesY_ - (src.rows % tilesY_), 0, tilesX_ - (src.cols % tilesX_), cv::BORDER_REFLECT_101, cv::Scalar(), s);
#endif
tileSize = cv::Size(srcExt_.cols / tilesX_, srcExt_.rows / tilesY_);
srcForLut = srcExt_;
}
const int tileSizeTotal = tileSize.area();
const float lutScale = static_cast<float>(histSize - 1) / tileSizeTotal;
int clipLimit = 0;
if (clipLimit_ > 0.0)
{
clipLimit = static_cast<int>(clipLimit_ * tileSizeTotal / histSize);
clipLimit = std::max(clipLimit, 1);
}
clahe::calcLut(srcForLut, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), clipLimit, lutScale, stream);
clahe::transform(src, dst, lut_, tilesX_, tilesY_, make_int2(tileSize.width, tileSize.height), stream);
}
void CLAHE_Impl::setClipLimit(double clipLimit)
{
clipLimit_ = clipLimit;
}
double CLAHE_Impl::getClipLimit() const
{
return clipLimit_;
}
void CLAHE_Impl::setTilesGridSize(cv::Size tileGridSize)
{
tilesX_ = tileGridSize.width;
tilesY_ = tileGridSize.height;
}
cv::Size CLAHE_Impl::getTilesGridSize() const
{
return cv::Size(tilesX_, tilesY_);
}
void CLAHE_Impl::collectGarbage()
{
srcExt_.release();
lut_.release();
}
}
cv::Ptr<cv::cuda::CLAHE> cv::cuda::createCLAHE(double clipLimit, cv::Size tileGridSize)
{
return new CLAHE_Impl(clipLimit, tileGridSize.width, tileGridSize.height);
}
////////////////////////////////////////////////////////////////////////
// NPP Histogram
namespace
{
typedef NppStatus (*get_buf_size_c1_t)(NppiSize oSizeROI, int nLevels, int* hpBufferSize);
typedef NppStatus (*get_buf_size_c4_t)(NppiSize oSizeROI, int nLevels[], int* hpBufferSize);
template<int SDEPTH> struct NppHistogramEvenFuncC1
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s * pHist,
int nLevels, Npp32s nLowerLevel, Npp32s nUpperLevel, Npp8u * pBuffer);
};
template<int SDEPTH> struct NppHistogramEvenFuncC4
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI,
Npp32s * pHist[4], int nLevels[4], Npp32s nLowerLevel[4], Npp32s nUpperLevel[4], Npp8u * pBuffer);
};
template<int SDEPTH, typename NppHistogramEvenFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
struct NppHistogramEvenC1
{
typedef typename NppHistogramEvenFuncC1<SDEPTH>::src_t src_t;
static void hist(const GpuMat& src, OutputArray _hist, InputOutputArray _buf, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
{
const int levels = histSize + 1;
_hist.create(1, histSize, CV_32S);
GpuMat hist = _hist.getGpuMat();
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
int buf_size;
get_buf_size(sz, levels, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8UC1, _buf);
GpuMat buf = _buf.getGpuMat();
NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels,
lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int SDEPTH, typename NppHistogramEvenFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
struct NppHistogramEvenC4
{
typedef typename NppHistogramEvenFuncC4<SDEPTH>::src_t src_t;
static void hist(const GpuMat& src, GpuMat hist[4],InputOutputArray _buf, int histSize[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream)
{
int levels[] = {histSize[0] + 1, histSize[1] + 1, histSize[2] + 1, histSize[3] + 1};
hist[0].create(1, histSize[0], CV_32S);
hist[1].create(1, histSize[1], CV_32S);
hist[2].create(1, histSize[2], CV_32S);
hist[3].create(1, histSize[3], CV_32S);
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
int buf_size;
get_buf_size(sz, levels, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
GpuMat buf = _buf.getGpuMat();
NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, levels, lowerLevel, upperLevel, buf.ptr<Npp8u>()) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int SDEPTH> struct NppHistogramRangeFuncC1
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef Npp32s level_t;
enum {LEVEL_TYPE_CODE=CV_32SC1};
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
const Npp32s* pLevels, int nLevels, Npp8u* pBuffer);
};
template<> struct NppHistogramRangeFuncC1<CV_32F>
{
typedef Npp32f src_t;
typedef Npp32f level_t;
enum {LEVEL_TYPE_CODE=CV_32FC1};
typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist,
const Npp32f* pLevels, int nLevels, Npp8u* pBuffer);
};
template<int SDEPTH> struct NppHistogramRangeFuncC4
{
typedef typename NPPTypeTraits<SDEPTH>::npp_type src_t;
typedef Npp32s level_t;
enum {LEVEL_TYPE_CODE=CV_32SC1};
typedef NppStatus (*func_ptr)(const src_t* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
const Npp32s* pLevels[4], int nLevels[4], Npp8u* pBuffer);
};
template<> struct NppHistogramRangeFuncC4<CV_32F>
{
typedef Npp32f src_t;
typedef Npp32f level_t;
enum {LEVEL_TYPE_CODE=CV_32FC1};
typedef NppStatus (*func_ptr)(const Npp32f* pSrc, int nSrcStep, NppiSize oSizeROI, Npp32s* pHist[4],
const Npp32f* pLevels[4], int nLevels[4], Npp8u* pBuffer);
};
template<int SDEPTH, typename NppHistogramRangeFuncC1<SDEPTH>::func_ptr func, get_buf_size_c1_t get_buf_size>
struct NppHistogramRangeC1
{
typedef typename NppHistogramRangeFuncC1<SDEPTH>::src_t src_t;
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
static void hist(const GpuMat& src, OutputArray _hist, const GpuMat& levels, InputOutputArray _buf, cudaStream_t stream)
{
CV_Assert( levels.type() == LEVEL_TYPE_CODE && levels.rows == 1 );
_hist.create(1, levels.cols - 1, CV_32S);
GpuMat hist = _hist.getGpuMat();
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
int buf_size;
get_buf_size(sz, levels.cols, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
GpuMat buf = _buf.getGpuMat();
NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, hist.ptr<Npp32s>(), levels.ptr<level_t>(), levels.cols, buf.ptr<Npp8u>()) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template<int SDEPTH, typename NppHistogramRangeFuncC4<SDEPTH>::func_ptr func, get_buf_size_c4_t get_buf_size>
struct NppHistogramRangeC4
{
typedef typename NppHistogramRangeFuncC4<SDEPTH>::src_t src_t;
typedef typename NppHistogramRangeFuncC1<SDEPTH>::level_t level_t;
enum {LEVEL_TYPE_CODE=NppHistogramRangeFuncC1<SDEPTH>::LEVEL_TYPE_CODE};
static void hist(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4],InputOutputArray _buf, cudaStream_t stream)
{
CV_Assert( levels[0].type() == LEVEL_TYPE_CODE && levels[0].rows == 1 );
CV_Assert( levels[1].type() == LEVEL_TYPE_CODE && levels[1].rows == 1 );
CV_Assert( levels[2].type() == LEVEL_TYPE_CODE && levels[2].rows == 1 );
CV_Assert( levels[3].type() == LEVEL_TYPE_CODE && levels[3].rows == 1 );
hist[0].create(1, levels[0].cols - 1, CV_32S);
hist[1].create(1, levels[1].cols - 1, CV_32S);
hist[2].create(1, levels[2].cols - 1, CV_32S);
hist[3].create(1, levels[3].cols - 1, CV_32S);
Npp32s* pHist[] = {hist[0].ptr<Npp32s>(), hist[1].ptr<Npp32s>(), hist[2].ptr<Npp32s>(), hist[3].ptr<Npp32s>()};
int nLevels[] = {levels[0].cols, levels[1].cols, levels[2].cols, levels[3].cols};
const level_t* pLevels[] = {levels[0].ptr<level_t>(), levels[1].ptr<level_t>(), levels[2].ptr<level_t>(), levels[3].ptr<level_t>()};
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
int buf_size;
get_buf_size(sz, nLevels, &buf_size);
ensureSizeIsEnough(1, buf_size, CV_8U, _buf);
GpuMat buf = _buf.getGpuMat();
NppStreamHandler h(stream);
nppSafeCall( func(src.ptr<src_t>(), static_cast<int>(src.step), sz, pHist, pLevels, nLevels, buf.ptr<Npp8u>()) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
}
void cv::cuda::evenLevels(OutputArray _levels, int nLevels, int lowerLevel, int upperLevel)
{
const int kind = _levels.kind();
_levels.create(1, nLevels, CV_32SC1);
Mat host_levels;
if (kind == _InputArray::GPU_MAT)
host_levels.create(1, nLevels, CV_32SC1);
else
host_levels = _levels.getMat();
nppSafeCall( nppiEvenLevelsHost_32s(host_levels.ptr<Npp32s>(), nLevels, lowerLevel, upperLevel) );
if (kind == _InputArray::GPU_MAT)
_levels.getGpuMatRef().upload(host_levels);
}
namespace hist
{
void histEven8u(PtrStepSzb src, int* hist, int binCount, int lowerLevel, int upperLevel, cudaStream_t stream);
}
namespace
{
void histEven8u(const GpuMat& src, GpuMat& hist, int histSize, int lowerLevel, int upperLevel, cudaStream_t stream)
{
hist.create(1, histSize, CV_32S);
cudaSafeCall( cudaMemsetAsync(hist.data, 0, histSize * sizeof(int), stream) );
hist::histEven8u(src, hist.ptr<int>(), histSize, lowerLevel, upperLevel, stream);
}
}
void cv::cuda::histEven(InputArray _src, OutputArray hist, InputOutputArray buf, int histSize, int lowerLevel, int upperLevel, Stream& stream)
{
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, InputOutputArray buf, int levels, int lowerLevel, int upperLevel, cudaStream_t stream);
static const hist_t hist_callers[] =
{
NppHistogramEvenC1<CV_8U , nppiHistogramEven_8u_C1R , nppiHistogramEvenGetBufferSize_8u_C1R >::hist,
0,
NppHistogramEvenC1<CV_16U, nppiHistogramEven_16u_C1R, nppiHistogramEvenGetBufferSize_16u_C1R>::hist,
NppHistogramEvenC1<CV_16S, nppiHistogramEven_16s_C1R, nppiHistogramEvenGetBufferSize_16s_C1R>::hist
};
GpuMat src = _src.getGpuMat();
if (src.depth() == CV_8U && deviceSupports(FEATURE_SET_COMPUTE_30))
{
histEven8u(src, hist.getGpuMatRef(), histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
return;
}
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 );
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
}
void cv::cuda::histEven(InputArray _src, GpuMat hist[4], InputOutputArray buf, int histSize[4], int lowerLevel[4], int upperLevel[4], Stream& stream)
{
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], InputOutputArray buf, int levels[4], int lowerLevel[4], int upperLevel[4], cudaStream_t stream);
static const hist_t hist_callers[] =
{
NppHistogramEvenC4<CV_8U , nppiHistogramEven_8u_C4R , nppiHistogramEvenGetBufferSize_8u_C4R >::hist,
0,
NppHistogramEvenC4<CV_16U, nppiHistogramEven_16u_C4R, nppiHistogramEvenGetBufferSize_16u_C4R>::hist,
NppHistogramEvenC4<CV_16S, nppiHistogramEven_16s_C4R, nppiHistogramEvenGetBufferSize_16s_C4R>::hist
};
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 );
hist_callers[src.depth()](src, hist, buf, histSize, lowerLevel, upperLevel, StreamAccessor::getStream(stream));
}
void cv::cuda::histRange(InputArray _src, OutputArray hist, InputArray _levels, InputOutputArray buf, Stream& stream)
{
typedef void (*hist_t)(const GpuMat& src, OutputArray hist, const GpuMat& levels, InputOutputArray buf, cudaStream_t stream);
static const hist_t hist_callers[] =
{
NppHistogramRangeC1<CV_8U , nppiHistogramRange_8u_C1R , nppiHistogramRangeGetBufferSize_8u_C1R >::hist,
0,
NppHistogramRangeC1<CV_16U, nppiHistogramRange_16u_C1R, nppiHistogramRangeGetBufferSize_16u_C1R>::hist,
NppHistogramRangeC1<CV_16S, nppiHistogramRange_16s_C1R, nppiHistogramRangeGetBufferSize_16s_C1R>::hist,
0,
NppHistogramRangeC1<CV_32F, nppiHistogramRange_32f_C1R, nppiHistogramRangeGetBufferSize_32f_C1R>::hist
};
GpuMat src = _src.getGpuMat();
GpuMat levels = _levels.getGpuMat();
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_16UC1 || src.type() == CV_16SC1 || src.type() == CV_32FC1 );
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
}
void cv::cuda::histRange(InputArray _src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, Stream& stream)
{
typedef void (*hist_t)(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4], InputOutputArray buf, cudaStream_t stream);
static const hist_t hist_callers[] =
{
NppHistogramRangeC4<CV_8U , nppiHistogramRange_8u_C4R , nppiHistogramRangeGetBufferSize_8u_C4R >::hist,
0,
NppHistogramRangeC4<CV_16U, nppiHistogramRange_16u_C4R, nppiHistogramRangeGetBufferSize_16u_C4R>::hist,
NppHistogramRangeC4<CV_16S, nppiHistogramRange_16s_C4R, nppiHistogramRangeGetBufferSize_16s_C4R>::hist,
0,
NppHistogramRangeC4<CV_32F, nppiHistogramRange_32f_C4R, nppiHistogramRangeGetBufferSize_32f_C4R>::hist
};
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC4 || src.type() == CV_16UC4 || src.type() == CV_16SC4 || src.type() == CV_32FC4 );
hist_callers[src.depth()](src, hist, levels, buf, StreamAccessor::getStream(stream));
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,297 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) || !defined(HAVE_OPENCV_CUDAFILTERS)
Ptr<cuda::HoughCirclesDetector> cv::cuda::createHoughCirclesDetector(float, float, int, int, int, int, int) { throw_no_cuda(); return Ptr<HoughCirclesDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace cuda { namespace device
{
namespace hough
{
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
}
namespace hough_circles
{
void circlesAccumCenters_gpu(const unsigned int* list, int count, PtrStepi dx, PtrStepi dy, PtrStepSzi accum, int minRadius, int maxRadius, float idp);
int buildCentersList_gpu(PtrStepSzi accum, unsigned int* centers, int threshold);
int circlesAccumRadius_gpu(const unsigned int* centers, int centersCount, const unsigned int* list, int count,
float3* circles, int maxCircles, float dp, int minRadius, int maxRadius, int threshold, bool has20);
}
}}}
namespace
{
class HoughCirclesDetectorImpl : public HoughCirclesDetector
{
public:
HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles);
void detect(InputArray src, OutputArray circles);
void setDp(float dp) { dp_ = dp; }
float getDp() const { return dp_; }
void setMinDist(float minDist) { minDist_ = minDist; }
float getMinDist() const { return minDist_; }
void setCannyThreshold(int cannyThreshold) { cannyThreshold_ = cannyThreshold; }
int getCannyThreshold() const { return cannyThreshold_; }
void setVotesThreshold(int votesThreshold) { votesThreshold_ = votesThreshold; }
int getVotesThreshold() const { return votesThreshold_; }
void setMinRadius(int minRadius) { minRadius_ = minRadius; }
int getMinRadius() const { return minRadius_; }
void setMaxRadius(int maxRadius) { maxRadius_ = maxRadius; }
int getMaxRadius() const { return maxRadius_; }
void setMaxCircles(int maxCircles) { maxCircles_ = maxCircles; }
int getMaxCircles() const { return maxCircles_; }
void write(FileStorage& fs) const
{
fs << "name" << "HoughCirclesDetector_GPU"
<< "dp" << dp_
<< "minDist" << minDist_
<< "cannyThreshold" << cannyThreshold_
<< "votesThreshold" << votesThreshold_
<< "minRadius" << minRadius_
<< "maxRadius" << maxRadius_
<< "maxCircles" << maxCircles_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "HoughCirclesDetector_GPU" );
dp_ = (float)fn["dp"];
minDist_ = (float)fn["minDist"];
cannyThreshold_ = (int)fn["cannyThreshold"];
votesThreshold_ = (int)fn["votesThreshold"];
minRadius_ = (int)fn["minRadius"];
maxRadius_ = (int)fn["maxRadius"];
maxCircles_ = (int)fn["maxCircles"];
}
private:
float dp_;
float minDist_;
int cannyThreshold_;
int votesThreshold_;
int minRadius_;
int maxRadius_;
int maxCircles_;
GpuMat dx_, dy_;
GpuMat edges_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
Ptr<cuda::Filter> filterDx_;
Ptr<cuda::Filter> filterDy_;
Ptr<cuda::CannyEdgeDetector> canny_;
};
HoughCirclesDetectorImpl::HoughCirclesDetectorImpl(float dp, float minDist, int cannyThreshold, int votesThreshold,
int minRadius, int maxRadius, int maxCircles) :
dp_(dp), minDist_(minDist), cannyThreshold_(cannyThreshold), votesThreshold_(votesThreshold),
minRadius_(minRadius), maxRadius_(maxRadius), maxCircles_(maxCircles)
{
canny_ = cuda::createCannyEdgeDetector(std::max(cannyThreshold_ / 2, 1), cannyThreshold_);
filterDx_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 1, 0);
filterDy_ = cuda::createSobelFilter(CV_8UC1, CV_32S, 0, 1);
}
void HoughCirclesDetectorImpl::detect(InputArray _src, OutputArray circles)
{
using namespace cv::cuda::device::hough;
using namespace cv::cuda::device::hough_circles;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
CV_Assert( dp_ > 0 );
CV_Assert( minRadius_ > 0 && maxRadius_ > minRadius_ );
CV_Assert( cannyThreshold_ > 0 );
CV_Assert( votesThreshold_ > 0 );
CV_Assert( maxCircles_ > 0 );
const float idp = 1.0f / dp_;
filterDx_->apply(src, dx_);
filterDy_->apply(src, dy_);
canny_->setLowThreshold(std::max(cannyThreshold_ / 2, 1));
canny_->setHighThreshold(cannyThreshold_);
canny_->detect(dx_, dy_, edges_);
ensureSizeIsEnough(2, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>(0);
unsigned int* centers = list_.ptr<unsigned int>(1);
const int pointsCount = buildPointList_gpu(edges_, srcPoints);
if (pointsCount == 0)
{
circles.release();
return;
}
ensureSizeIsEnough(cvCeil(src.rows * idp) + 2, cvCeil(src.cols * idp) + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
circlesAccumCenters_gpu(srcPoints, pointsCount, dx_, dy_, accum_, minRadius_, maxRadius_, idp);
int centersCount = buildCentersList_gpu(accum_, centers, votesThreshold_);
if (centersCount == 0)
{
circles.release();
return;
}
if (minDist_ > 1)
{
AutoBuffer<ushort2> oldBuf_(centersCount);
AutoBuffer<ushort2> newBuf_(centersCount);
int newCount = 0;
ushort2* oldBuf = oldBuf_;
ushort2* newBuf = newBuf_;
cudaSafeCall( cudaMemcpy(oldBuf, centers, centersCount * sizeof(ushort2), cudaMemcpyDeviceToHost) );
const int cellSize = cvRound(minDist_);
const int gridWidth = (src.cols + cellSize - 1) / cellSize;
const int gridHeight = (src.rows + cellSize - 1) / cellSize;
std::vector< std::vector<ushort2> > grid(gridWidth * gridHeight);
const float minDist2 = minDist_ * minDist_;
for (int i = 0; i < centersCount; ++i)
{
ushort2 p = oldBuf[i];
bool good = true;
int xCell = static_cast<int>(p.x / cellSize);
int yCell = static_cast<int>(p.y / cellSize);
int x1 = xCell - 1;
int y1 = yCell - 1;
int x2 = xCell + 1;
int y2 = yCell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(gridWidth - 1, x2);
y2 = std::min(gridHeight - 1, y2);
for (int yy = y1; yy <= y2; ++yy)
{
for (int xx = x1; xx <= x2; ++xx)
{
std::vector<ushort2>& m = grid[yy * gridWidth + xx];
for(size_t j = 0; j < m.size(); ++j)
{
float dx = (float)(p.x - m[j].x);
float dy = (float)(p.y - m[j].y);
if (dx * dx + dy * dy < minDist2)
{
good = false;
goto break_out;
}
}
}
}
break_out:
if(good)
{
grid[yCell * gridWidth + xCell].push_back(p);
newBuf[newCount++] = p;
}
}
cudaSafeCall( cudaMemcpy(centers, newBuf, newCount * sizeof(unsigned int), cudaMemcpyHostToDevice) );
centersCount = newCount;
}
ensureSizeIsEnough(1, maxCircles_, CV_32FC3, result_);
int circlesCount = circlesAccumRadius_gpu(centers, centersCount, srcPoints, pointsCount, result_.ptr<float3>(), maxCircles_,
dp_, minRadius_, maxRadius_, votesThreshold_, deviceSupports(FEATURE_SET_COMPUTE_20));
if (circlesCount == 0)
{
circles.release();
return;
}
result_.cols = circlesCount;
result_.copyTo(circles);
}
}
Ptr<HoughCirclesDetector> cv::cuda::createHoughCirclesDetector(float dp, float minDist, int cannyThreshold, int votesThreshold, int minRadius, int maxRadius, int maxCircles)
{
return new HoughCirclesDetectorImpl(dp, minDist, cannyThreshold, votesThreshold, minRadius, maxRadius, maxCircles);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,202 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<cuda::HoughLinesDetector> cv::cuda::createHoughLinesDetector(float, float, int, bool, int) { throw_no_cuda(); return Ptr<HoughLinesDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace cuda { namespace device
{
namespace hough
{
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
}
namespace hough_lines
{
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
int linesGetResult_gpu(PtrStepSzi accum, float2* out, int* votes, int maxSize, float rho, float theta, int threshold, bool doSort);
}
}}}
namespace
{
class HoughLinesDetectorImpl : public HoughLinesDetector
{
public:
HoughLinesDetectorImpl(float rho, float theta, int threshold, bool doSort, int maxLines) :
rho_(rho), theta_(theta), threshold_(threshold), doSort_(doSort), maxLines_(maxLines)
{
}
void detect(InputArray src, OutputArray lines);
void downloadResults(InputArray d_lines, OutputArray h_lines, OutputArray h_votes = noArray());
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
void setTheta(float theta) { theta_ = theta; }
float getTheta() const { return theta_; }
void setThreshold(int threshold) { threshold_ = threshold; }
int getThreshold() const { return threshold_; }
void setDoSort(bool doSort) { doSort_ = doSort; }
bool getDoSort() const { return doSort_; }
void setMaxLines(int maxLines) { maxLines_ = maxLines; }
int getMaxLines() const { return maxLines_; }
void write(FileStorage& fs) const
{
fs << "name" << "HoughLinesDetector_GPU"
<< "rho" << rho_
<< "theta" << theta_
<< "threshold" << threshold_
<< "doSort" << doSort_
<< "maxLines" << maxLines_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "HoughLinesDetector_GPU" );
rho_ = (float)fn["rho"];
theta_ = (float)fn["theta"];
threshold_ = (int)fn["threshold"];
doSort_ = (int)fn["doSort"] != 0;
maxLines_ = (int)fn["maxLines"];
}
private:
float rho_;
float theta_;
int threshold_;
bool doSort_;
int maxLines_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
};
void HoughLinesDetectorImpl::detect(InputArray _src, OutputArray lines)
{
using namespace cv::cuda::device::hough;
using namespace cv::cuda::device::hough_lines;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>();
const int pointsCount = buildPointList_gpu(src, srcPoints);
if (pointsCount == 0)
{
lines.release();
return;
}
const int numangle = cvRound(CV_PI / theta_);
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
CV_Assert( numangle > 0 && numrho > 0 );
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
DeviceInfo devInfo;
linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
ensureSizeIsEnough(2, maxLines_, CV_32FC2, result_);
int linesCount = linesGetResult_gpu(accum_, result_.ptr<float2>(0), result_.ptr<int>(1), maxLines_, rho_, theta_, threshold_, doSort_);
if (linesCount == 0)
{
lines.release();
return;
}
result_.cols = linesCount;
result_.copyTo(lines);
}
void HoughLinesDetectorImpl::downloadResults(InputArray _d_lines, OutputArray h_lines, OutputArray h_votes)
{
GpuMat d_lines = _d_lines.getGpuMat();
if (d_lines.empty())
{
h_lines.release();
if (h_votes.needed())
h_votes.release();
return;
}
CV_Assert( d_lines.rows == 2 && d_lines.type() == CV_32FC2 );
d_lines.row(0).download(h_lines);
if (h_votes.needed())
{
GpuMat d_votes(1, d_lines.cols, CV_32SC1, d_lines.ptr<int>(1));
d_votes.download(h_votes);
}
}
}
Ptr<HoughLinesDetector> cv::cuda::createHoughLinesDetector(float rho, float theta, int threshold, bool doSort, int maxLines)
{
return new HoughLinesDetectorImpl(rho, theta, threshold, doSort, maxLines);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,183 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
Ptr<cuda::HoughSegmentDetector> cv::cuda::createHoughSegmentDetector(float, float, int, int, int) { throw_no_cuda(); return Ptr<HoughSegmentDetector>(); }
#else /* !defined (HAVE_CUDA) */
namespace cv { namespace cuda { namespace device
{
namespace hough
{
int buildPointList_gpu(PtrStepSzb src, unsigned int* list);
}
namespace hough_lines
{
void linesAccum_gpu(const unsigned int* list, int count, PtrStepSzi accum, float rho, float theta, size_t sharedMemPerBlock, bool has20);
}
namespace hough_segments
{
int houghLinesProbabilistic_gpu(PtrStepSzb mask, PtrStepSzi accum, int4* out, int maxSize, float rho, float theta, int lineGap, int lineLength);
}
}}}
namespace
{
class HoughSegmentDetectorImpl : public HoughSegmentDetector
{
public:
HoughSegmentDetectorImpl(float rho, float theta, int minLineLength, int maxLineGap, int maxLines) :
rho_(rho), theta_(theta), minLineLength_(minLineLength), maxLineGap_(maxLineGap), maxLines_(maxLines)
{
}
void detect(InputArray src, OutputArray lines);
void setRho(float rho) { rho_ = rho; }
float getRho() const { return rho_; }
void setTheta(float theta) { theta_ = theta; }
float getTheta() const { return theta_; }
void setMinLineLength(int minLineLength) { minLineLength_ = minLineLength; }
int getMinLineLength() const { return minLineLength_; }
void setMaxLineGap(int maxLineGap) { maxLineGap_ = maxLineGap; }
int getMaxLineGap() const { return maxLineGap_; }
void setMaxLines(int maxLines) { maxLines_ = maxLines; }
int getMaxLines() const { return maxLines_; }
void write(FileStorage& fs) const
{
fs << "name" << "PHoughLinesDetector_GPU"
<< "rho" << rho_
<< "theta" << theta_
<< "minLineLength" << minLineLength_
<< "maxLineGap" << maxLineGap_
<< "maxLines" << maxLines_;
}
void read(const FileNode& fn)
{
CV_Assert( String(fn["name"]) == "PHoughLinesDetector_GPU" );
rho_ = (float)fn["rho"];
theta_ = (float)fn["theta"];
minLineLength_ = (int)fn["minLineLength"];
maxLineGap_ = (int)fn["maxLineGap"];
maxLines_ = (int)fn["maxLines"];
}
private:
float rho_;
float theta_;
int minLineLength_;
int maxLineGap_;
int maxLines_;
GpuMat accum_;
GpuMat list_;
GpuMat result_;
};
void HoughSegmentDetectorImpl::detect(InputArray _src, OutputArray lines)
{
using namespace cv::cuda::device::hough;
using namespace cv::cuda::device::hough_lines;
using namespace cv::cuda::device::hough_segments;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
CV_Assert( src.cols < std::numeric_limits<unsigned short>::max() );
CV_Assert( src.rows < std::numeric_limits<unsigned short>::max() );
ensureSizeIsEnough(1, src.size().area(), CV_32SC1, list_);
unsigned int* srcPoints = list_.ptr<unsigned int>();
const int pointsCount = buildPointList_gpu(src, srcPoints);
if (pointsCount == 0)
{
lines.release();
return;
}
const int numangle = cvRound(CV_PI / theta_);
const int numrho = cvRound(((src.cols + src.rows) * 2 + 1) / rho_);
CV_Assert( numangle > 0 && numrho > 0 );
ensureSizeIsEnough(numangle + 2, numrho + 2, CV_32SC1, accum_);
accum_.setTo(Scalar::all(0));
DeviceInfo devInfo;
linesAccum_gpu(srcPoints, pointsCount, accum_, rho_, theta_, devInfo.sharedMemPerBlock(), devInfo.supports(FEATURE_SET_COMPUTE_20));
ensureSizeIsEnough(1, maxLines_, CV_32SC4, result_);
int linesCount = houghLinesProbabilistic_gpu(src, accum_, result_.ptr<int4>(), maxLines_, rho_, theta_, maxLineGap_, minLineLength_);
if (linesCount == 0)
{
lines.release();
return;
}
result_.cols = linesCount;
result_.copyTo(lines);
}
}
Ptr<HoughSegmentDetector> cv::cuda::createHoughSegmentDetector(float rho, float theta, int minLineLength, int maxLineGap, int maxLines)
{
return new HoughSegmentDetectorImpl(rho, theta, minLineLength, maxLineGap, maxLines);
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,649 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDAARITHM) || defined (CUDA_DISABLER)
Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int, int, Size) { throw_no_cuda(); return Ptr<cuda::TemplateMatching>(); }
#else
namespace cv { namespace cuda { namespace device
{
namespace match_template
{
void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
int cn, cudaStream_t stream);
void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum, unsigned long long templ_sqsum, PtrStepSzf result,
int cn, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<unsigned int> image_sum, unsigned int templ_sum, PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC2(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r,
const PtrStepSz<unsigned int> image_sum_g,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC3(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r,
const PtrStepSz<unsigned int> image_sum_g,
const PtrStepSz<unsigned int> image_sum_b,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_8UC4(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r,
const PtrStepSz<unsigned int> image_sum_g,
const PtrStepSz<unsigned int> image_sum_b,
const PtrStepSz<unsigned int> image_sum_a,
unsigned int templ_sum_r,
unsigned int templ_sum_g,
unsigned int templ_sum_b,
unsigned int templ_sum_a,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8U(
int w, int h, const PtrStepSz<unsigned int> image_sum,
const PtrStepSz<unsigned long long> image_sqsum,
unsigned int templ_sum, unsigned long long templ_sqsum,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC2(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC3(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
PtrStepSzf result, cudaStream_t stream);
void matchTemplatePrepared_CCOFF_NORMED_8UC4(
int w, int h,
const PtrStepSz<unsigned int> image_sum_r, const PtrStepSz<unsigned long long> image_sqsum_r,
const PtrStepSz<unsigned int> image_sum_g, const PtrStepSz<unsigned long long> image_sqsum_g,
const PtrStepSz<unsigned int> image_sum_b, const PtrStepSz<unsigned long long> image_sqsum_b,
const PtrStepSz<unsigned int> image_sum_a, const PtrStepSz<unsigned long long> image_sqsum_a,
unsigned int templ_sum_r, unsigned long long templ_sqsum_r,
unsigned int templ_sum_g, unsigned long long templ_sqsum_g,
unsigned int templ_sum_b, unsigned long long templ_sqsum_b,
unsigned int templ_sum_a, unsigned long long templ_sqsum_a,
PtrStepSzf result, cudaStream_t stream);
void normalize_8U(int w, int h, const PtrStepSz<unsigned long long> image_sqsum,
unsigned long long templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
}
}}}
namespace
{
// Evaluates optimal template's area threshold. If
// template's area is less than the threshold, we use naive match
// template version, otherwise FFT-based (if available)
int getTemplateThreshold(int method, int depth)
{
switch (method)
{
case TM_CCORR:
if (depth == CV_32F) return 250;
if (depth == CV_8U) return 300;
break;
case TM_SQDIFF:
if (depth == CV_8U) return 300;
break;
}
CV_Error(Error::StsBadArg, "unsupported match template mode");
return 0;
}
///////////////////////////////////////////////////////////////
// CCORR_32F
class Match_CCORR_32F : public TemplateMatching
{
public:
explicit Match_CCORR_32F(Size user_block_size);
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
Ptr<cuda::Convolution> conv_;
GpuMat result_;
};
Match_CCORR_32F::Match_CCORR_32F(Size user_block_size)
{
conv_ = cuda::createConvolution(user_block_size);
}
void Match_CCORR_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& _stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_32F );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
cudaStream_t stream = StreamAccessor::getStream(_stream);
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_32F))
{
matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), stream);
return;
}
if (image.channels() == 1)
{
conv_->convolve(image.reshape(1), templ.reshape(1), result, true, _stream);
}
else
{
conv_->convolve(image.reshape(1), templ.reshape(1), result_, true, _stream);
extractFirstChannel_32F(result_, result, image.channels(), stream);
}
}
///////////////////////////////////////////////////////////////
// CCORR_8U
class Match_CCORR_8U : public TemplateMatching
{
public:
explicit Match_CCORR_8U(Size user_block_size) : match32F_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat imagef_, templf_;
Match_CCORR_32F match32F_;
};
void Match_CCORR_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_8U))
{
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
return;
}
image.convertTo(imagef_, CV_32F, stream);
templ.convertTo(templf_, CV_32F, stream);
match32F_.match(imagef_, templf_, _result, stream);
}
///////////////////////////////////////////////////////////////
// CCORR_NORMED_8U
class Match_CCORR_NORMED_8U : public TemplateMatching
{
public:
explicit Match_CCORR_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
Match_CCORR_8U match_CCORR_;
GpuMat image_sqsums_;
GpuMat intBuffer_;
};
void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// SQDIFF_32F
class Match_SQDIFF_32F : public TemplateMatching
{
public:
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
};
void Match_SQDIFF_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_32F );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// SQDIFF_8U
class Match_SQDIFF_8U : public TemplateMatching
{
public:
explicit Match_SQDIFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat image_sqsums_;
GpuMat intBuffer_;
Match_CCORR_8U match_CCORR_;
};
void Match_SQDIFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
if (templ.size().area() < getTemplateThreshold(TM_SQDIFF, CV_8U))
{
_result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
GpuMat result = _result.getGpuMat();
matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
return;
}
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// SQDIFF_NORMED_8U
class Match_SQDIFF_NORMED_8U : public TemplateMatching
{
public:
explicit Match_SQDIFF_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat image_sqsums_;
GpuMat intBuffer_;
Match_CCORR_8U match_CCORR_;
};
void Match_SQDIFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
cuda::sqrIntegral(image.reshape(1), image_sqsums_, intBuffer_, stream);
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ.reshape(1))[0];
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
}
///////////////////////////////////////////////////////////////
// CCOFF_8U
class Match_CCOEFF_8U : public TemplateMatching
{
public:
explicit Match_CCOEFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat intBuffer_;
std::vector<GpuMat> images_;
std::vector<GpuMat> image_sums_;
Match_CCORR_8U match_CCORR_;
};
void Match_CCOEFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
match_CCORR_.match(image, templ, _result, stream);
GpuMat result = _result.getGpuMat();
if (image.channels() == 1)
{
image_sums_.resize(1);
cuda::integral(image, image_sums_[0], intBuffer_, stream);
unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
}
else
{
cuda::split(image, images_);
image_sums_.resize(images_.size());
for (int i = 0; i < image.channels(); ++i)
cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
Scalar templ_sum = cuda::sum(templ);
switch (image.channels())
{
case 2:
matchTemplatePrepared_CCOFF_8UC2(
templ.cols, templ.rows, image_sums_[0], image_sums_[1],
(unsigned int) templ_sum[0], (unsigned int) templ_sum[1],
result, StreamAccessor::getStream(stream));
break;
case 3:
matchTemplatePrepared_CCOFF_8UC3(
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2],
(unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2],
result, StreamAccessor::getStream(stream));
break;
case 4:
matchTemplatePrepared_CCOFF_8UC4(
templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3],
(unsigned int) templ_sum[0], (unsigned int) templ_sum[1], (unsigned int) templ_sum[2], (unsigned int) templ_sum[3],
result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(Error::StsBadArg, "unsupported number of channels");
}
}
}
///////////////////////////////////////////////////////////////
// CCOFF_NORMED_8U
class Match_CCOEFF_NORMED_8U : public TemplateMatching
{
public:
explicit Match_CCOEFF_NORMED_8U(Size user_block_size) : match_CCORR_32F_(user_block_size)
{
}
void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
private:
GpuMat imagef_, templf_;
Match_CCORR_32F match_CCORR_32F_;
GpuMat intBuffer_;
std::vector<GpuMat> images_;
std::vector<GpuMat> image_sums_;
std::vector<GpuMat> image_sqsums_;
};
void Match_CCOEFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
{
using namespace cv::cuda::device::match_template;
GpuMat image = _image.getGpuMat();
GpuMat templ = _templ.getGpuMat();
CV_Assert( image.depth() == CV_8U );
CV_Assert( image.type() == templ.type() );
CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
image.convertTo(imagef_, CV_32F, stream);
templ.convertTo(templf_, CV_32F, stream);
match_CCORR_32F_.match(imagef_, templf_, _result, stream);
GpuMat result = _result.getGpuMat();
if (image.channels() == 1)
{
image_sums_.resize(1);
cuda::integral(image, image_sums_[0], intBuffer_, stream);
image_sqsums_.resize(1);
cuda::sqrIntegral(image, image_sqsums_[0], intBuffer_, stream);
unsigned int templ_sum = (unsigned int) cuda::sum(templ)[0];
unsigned long long templ_sqsum = (unsigned long long) cuda::sqrSum(templ)[0];
matchTemplatePrepared_CCOFF_NORMED_8U(
templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream));
}
else
{
cuda::split(image, images_);
image_sums_.resize(images_.size());
image_sqsums_.resize(images_.size());
for (int i = 0; i < image.channels(); ++i)
{
cuda::integral(images_[i], image_sums_[i], intBuffer_, stream);
cuda::sqrIntegral(images_[i], image_sqsums_[i], intBuffer_, stream);
}
Scalar templ_sum = cuda::sum(templ);
Scalar templ_sqsum = cuda::sqrSum(templ);
switch (image.channels())
{
case 2:
matchTemplatePrepared_CCOFF_NORMED_8UC2(
templ.cols, templ.rows,
image_sums_[0], image_sqsums_[0],
image_sums_[1], image_sqsums_[1],
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
result, StreamAccessor::getStream(stream));
break;
case 3:
matchTemplatePrepared_CCOFF_NORMED_8UC3(
templ.cols, templ.rows,
image_sums_[0], image_sqsums_[0],
image_sums_[1], image_sqsums_[1],
image_sums_[2], image_sqsums_[2],
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
(unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
result, StreamAccessor::getStream(stream));
break;
case 4:
matchTemplatePrepared_CCOFF_NORMED_8UC4(
templ.cols, templ.rows,
image_sums_[0], image_sqsums_[0],
image_sums_[1], image_sqsums_[1],
image_sums_[2], image_sqsums_[2],
image_sums_[3], image_sqsums_[3],
(unsigned int)templ_sum[0], (unsigned long long)templ_sqsum[0],
(unsigned int)templ_sum[1], (unsigned long long)templ_sqsum[1],
(unsigned int)templ_sum[2], (unsigned long long)templ_sqsum[2],
(unsigned int)templ_sum[3], (unsigned long long)templ_sqsum[3],
result, StreamAccessor::getStream(stream));
break;
default:
CV_Error(Error::StsBadArg, "unsupported number of channels");
}
}
}
}
Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int srcType, int method, Size user_block_size)
{
const int sdepth = CV_MAT_DEPTH(srcType);
CV_Assert( sdepth == CV_8U || sdepth == CV_32F );
if (sdepth == CV_32F)
{
switch (method)
{
case TM_SQDIFF:
return new Match_SQDIFF_32F;
case TM_CCORR:
return new Match_CCORR_32F(user_block_size);
default:
CV_Error( Error::StsBadFlag, "Unsopported method" );
return Ptr<cuda::TemplateMatching>();
}
}
else
{
switch (method)
{
case TM_SQDIFF:
return new Match_SQDIFF_8U(user_block_size);
case TM_SQDIFF_NORMED:
return new Match_SQDIFF_NORMED_8U(user_block_size);
case TM_CCORR:
return new Match_CCORR_8U(user_block_size);
case TM_CCORR_NORMED:
return new Match_CCORR_NORMED_8U(user_block_size);
case TM_CCOEFF:
return new Match_CCOEFF_8U(user_block_size);
case TM_CCOEFF_NORMED:
return new Match_CCOEFF_NORMED_8U(user_block_size);
default:
CV_Error( Error::StsBadFlag, "Unsopported method" );
return Ptr<cuda::TemplateMatching>();
}
}
}
#endif

View File

@@ -0,0 +1,128 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using namespace cv;
using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
void cv::cuda::meanShiftFiltering(InputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
void cv::cuda::meanShiftProc(InputArray, OutputArray, OutputArray, int, int, TermCriteria, Stream&) { throw_no_cuda(); }
#else /* !defined (HAVE_CUDA) */
////////////////////////////////////////////////////////////////////////
// meanShiftFiltering
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void meanShiftFiltering_gpu(const PtrStepSzb& src, PtrStepSzb dst, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
}
}}}
void cv::cuda::meanShiftFiltering(InputArray _src, OutputArray _dst, int sp, int sr, TermCriteria criteria, Stream& stream)
{
using namespace ::cv::cuda::device::imgproc;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC4 );
_dst.create(src.size(), CV_8UC4);
GpuMat dst = _dst.getGpuMat();
if (!(criteria.type & TermCriteria::MAX_ITER))
criteria.maxCount = 5;
int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
if (!(criteria.type & TermCriteria::EPS))
criteria.epsilon = 1.f;
float eps = (float) std::max(criteria.epsilon, 0.0);
meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
}
////////////////////////////////////////////////////////////////////////
// meanShiftProc_GPU
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void meanShiftProc_gpu(const PtrStepSzb& src, PtrStepSzb dstr, PtrStepSzb dstsp, int sp, int sr, int maxIter, float eps, cudaStream_t stream);
}
}}}
void cv::cuda::meanShiftProc(InputArray _src, OutputArray _dstr, OutputArray _dstsp, int sp, int sr, TermCriteria criteria, Stream& stream)
{
using namespace ::cv::cuda::device::imgproc;
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC4 );
_dstr.create(src.size(), CV_8UC4);
_dstsp.create(src.size(), CV_16SC2);
GpuMat dstr = _dstr.getGpuMat();
GpuMat dstsp = _dstsp.getGpuMat();
if (!(criteria.type & TermCriteria::MAX_ITER))
criteria.maxCount = 5;
int maxIter = std::min(std::max(criteria.maxCount, 1), 100);
if (!(criteria.type & TermCriteria::EPS))
criteria.epsilon = 1.f;
float eps = (float) std::max(criteria.epsilon, 0.0);
meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps, StreamAccessor::getStream(stream));
}
#endif /* !defined (HAVE_CUDA) */

View File

@@ -0,0 +1,391 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
#if !defined HAVE_CUDA || defined(CUDA_DISABLER)
void cv::cuda::meanShiftSegmentation(InputArray, OutputArray, int, int, int, TermCriteria) { throw_no_cuda(); }
#else
// Auxiliray stuff
namespace
{
//
// Declarations
//
class DjSets
{
public:
DjSets(int n);
int find(int elem);
int merge(int set1, int set2);
std::vector<int> parent;
std::vector<int> rank;
std::vector<int> size;
private:
DjSets(const DjSets&);
void operator =(const DjSets&);
};
template <typename T>
struct GraphEdge
{
GraphEdge() {}
GraphEdge(int to_, int next_, const T& val_) : to(to_), next(next_), val(val_) {}
int to;
int next;
T val;
};
template <typename T>
class Graph
{
public:
typedef GraphEdge<T> Edge;
Graph(int numv, int nume_max);
void addEdge(int from, int to, const T& val=T());
std::vector<int> start;
std::vector<Edge> edges;
int numv;
int nume_max;
int nume;
private:
Graph(const Graph&);
void operator =(const Graph&);
};
struct SegmLinkVal
{
SegmLinkVal() {}
SegmLinkVal(int dr_, int dsp_) : dr(dr_), dsp(dsp_) {}
bool operator <(const SegmLinkVal& other) const
{
return dr + dsp < other.dr + other.dsp;
}
int dr;
int dsp;
};
struct SegmLink
{
SegmLink() {}
SegmLink(int from_, int to_, const SegmLinkVal& val_)
: from(from_), to(to_), val(val_) {}
bool operator <(const SegmLink& other) const
{
return val < other.val;
}
int from;
int to;
SegmLinkVal val;
};
//
// Implementation
//
DjSets::DjSets(int n) : parent(n), rank(n, 0), size(n, 1)
{
for (int i = 0; i < n; ++i)
parent[i] = i;
}
inline int DjSets::find(int elem)
{
int set = elem;
while (set != parent[set])
set = parent[set];
while (elem != parent[elem])
{
int next = parent[elem];
parent[elem] = set;
elem = next;
}
return set;
}
inline int DjSets::merge(int set1, int set2)
{
if (rank[set1] < rank[set2])
{
parent[set1] = set2;
size[set2] += size[set1];
return set2;
}
if (rank[set2] < rank[set1])
{
parent[set2] = set1;
size[set1] += size[set2];
return set1;
}
parent[set1] = set2;
rank[set2]++;
size[set2] += size[set1];
return set2;
}
template <typename T>
Graph<T>::Graph(int numv_, int nume_max_) : start(numv_, -1), edges(nume_max_)
{
this->numv = numv_;
this->nume_max = nume_max_;
nume = 0;
}
template <typename T>
inline void Graph<T>::addEdge(int from, int to, const T& val)
{
edges[nume] = Edge(to, start[from], val);
start[from] = nume;
nume++;
}
inline int pix(int y, int x, int ncols)
{
return y * ncols + x;
}
inline int sqr(int x)
{
return x * x;
}
inline int dist2(const cv::Vec4b& lhs, const cv::Vec4b& rhs)
{
return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]) + sqr(lhs[2] - rhs[2]);
}
inline int dist2(const cv::Vec2s& lhs, const cv::Vec2s& rhs)
{
return sqr(lhs[0] - rhs[0]) + sqr(lhs[1] - rhs[1]);
}
} // anonymous namespace
void cv::cuda::meanShiftSegmentation(InputArray _src, OutputArray _dst, int sp, int sr, int minsize, TermCriteria criteria)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC4 );
const int nrows = src.rows;
const int ncols = src.cols;
const int hr = sr;
const int hsp = sp;
// Perform mean shift procedure and obtain region and spatial maps
GpuMat d_rmap, d_spmap;
cuda::meanShiftProc(src, d_rmap, d_spmap, sp, sr, criteria);
Mat rmap(d_rmap);
Mat spmap(d_spmap);
Graph<SegmLinkVal> g(nrows * ncols, 4 * (nrows - 1) * (ncols - 1)
+ (nrows - 1) + (ncols - 1));
// Make region adjacent graph from image
Vec4b r1;
Vec4b r2[4];
Vec2s sp1;
Vec2s sp2[4];
int dr[4];
int dsp[4];
for (int y = 0; y < nrows - 1; ++y)
{
Vec4b* ry = rmap.ptr<Vec4b>(y);
Vec4b* ryp = rmap.ptr<Vec4b>(y + 1);
Vec2s* spy = spmap.ptr<Vec2s>(y);
Vec2s* spyp = spmap.ptr<Vec2s>(y + 1);
for (int x = 0; x < ncols - 1; ++x)
{
r1 = ry[x];
sp1 = spy[x];
r2[0] = ry[x + 1];
r2[1] = ryp[x];
r2[2] = ryp[x + 1];
r2[3] = ryp[x];
sp2[0] = spy[x + 1];
sp2[1] = spyp[x];
sp2[2] = spyp[x + 1];
sp2[3] = spyp[x];
dr[0] = dist2(r1, r2[0]);
dr[1] = dist2(r1, r2[1]);
dr[2] = dist2(r1, r2[2]);
dsp[0] = dist2(sp1, sp2[0]);
dsp[1] = dist2(sp1, sp2[1]);
dsp[2] = dist2(sp1, sp2[2]);
r1 = ry[x + 1];
sp1 = spy[x + 1];
dr[3] = dist2(r1, r2[3]);
dsp[3] = dist2(sp1, sp2[3]);
g.addEdge(pix(y, x, ncols), pix(y, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
g.addEdge(pix(y, x, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[1], dsp[1]));
g.addEdge(pix(y, x, ncols), pix(y + 1, x + 1, ncols), SegmLinkVal(dr[2], dsp[2]));
g.addEdge(pix(y, x + 1, ncols), pix(y + 1, x, ncols), SegmLinkVal(dr[3], dsp[3]));
}
}
for (int y = 0; y < nrows - 1; ++y)
{
r1 = rmap.at<Vec4b>(y, ncols - 1);
r2[0] = rmap.at<Vec4b>(y + 1, ncols - 1);
sp1 = spmap.at<Vec2s>(y, ncols - 1);
sp2[0] = spmap.at<Vec2s>(y + 1, ncols - 1);
dr[0] = dist2(r1, r2[0]);
dsp[0] = dist2(sp1, sp2[0]);
g.addEdge(pix(y, ncols - 1, ncols), pix(y + 1, ncols - 1, ncols), SegmLinkVal(dr[0], dsp[0]));
}
for (int x = 0; x < ncols - 1; ++x)
{
r1 = rmap.at<Vec4b>(nrows - 1, x);
r2[0] = rmap.at<Vec4b>(nrows - 1, x + 1);
sp1 = spmap.at<Vec2s>(nrows - 1, x);
sp2[0] = spmap.at<Vec2s>(nrows - 1, x + 1);
dr[0] = dist2(r1, r2[0]);
dsp[0] = dist2(sp1, sp2[0]);
g.addEdge(pix(nrows - 1, x, ncols), pix(nrows - 1, x + 1, ncols), SegmLinkVal(dr[0], dsp[0]));
}
DjSets comps(g.numv);
// Find adjacent components
for (int v = 0; v < g.numv; ++v)
{
for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
{
int c1 = comps.find(v);
int c2 = comps.find(g.edges[e_it].to);
if (c1 != c2 && g.edges[e_it].val.dr < hr && g.edges[e_it].val.dsp < hsp)
comps.merge(c1, c2);
}
}
std::vector<SegmLink> edges;
edges.reserve(g.numv);
// Prepare edges connecting differnet components
for (int v = 0; v < g.numv; ++v)
{
int c1 = comps.find(v);
for (int e_it = g.start[v]; e_it != -1; e_it = g.edges[e_it].next)
{
int c2 = comps.find(g.edges[e_it].to);
if (c1 != c2)
edges.push_back(SegmLink(c1, c2, g.edges[e_it].val));
}
}
// Sort all graph's edges connecting differnet components (in asceding order)
std::sort(edges.begin(), edges.end());
// Exclude small components (starting from the nearest couple)
for (size_t i = 0; i < edges.size(); ++i)
{
int c1 = comps.find(edges[i].from);
int c2 = comps.find(edges[i].to);
if (c1 != c2 && (comps.size[c1] < minsize || comps.size[c2] < minsize))
comps.merge(c1, c2);
}
// Compute sum of the pixel's colors which are in the same segment
Mat h_src(src);
std::vector<Vec4i> sumcols(nrows * ncols, Vec4i(0, 0, 0, 0));
for (int y = 0; y < nrows; ++y)
{
Vec4b* h_srcy = h_src.ptr<Vec4b>(y);
for (int x = 0; x < ncols; ++x)
{
int parent = comps.find(pix(y, x, ncols));
Vec4b col = h_srcy[x];
Vec4i& sumcol = sumcols[parent];
sumcol[0] += col[0];
sumcol[1] += col[1];
sumcol[2] += col[2];
}
}
// Create final image, color of each segment is the average color of its pixels
_dst.create(src.size(), src.type());
Mat dst = _dst.getMat();
for (int y = 0; y < nrows; ++y)
{
Vec4b* dsty = dst.ptr<Vec4b>(y);
for (int x = 0; x < ncols; ++x)
{
int parent = comps.find(pix(y, x, ncols));
const Vec4i& sumcol = sumcols[parent];
Vec4b& dstcol = dsty[x];
dstcol[0] = static_cast<uchar>(sumcol[0] / comps.size[parent]);
dstcol[1] = static_cast<uchar>(sumcol[1] / comps.size[parent]);
dstcol[2] = static_cast<uchar>(sumcol[2] / comps.size[parent]);
dstcol[3] = 255;
}
}
}
#endif // #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)

View File

@@ -0,0 +1,43 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"

View File

@@ -0,0 +1,62 @@
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_PRECOMP_H__
#define __OPENCV_PRECOMP_H__
#include "opencv2/cudaimgproc.hpp"
#include "opencv2/core/utility.hpp"
#include "opencv2/core/private.hpp"
#include "opencv2/core/private.cuda.hpp"
#include "opencv2/opencv_modules.hpp"
#ifdef HAVE_OPENCV_CUDAARITHM
# include "opencv2/cudaarithm.hpp"
#endif
#ifdef HAVE_OPENCV_CUDAFILTERS
# include "opencv2/cudafilters.hpp"
#endif
#endif /* __OPENCV_PRECOMP_H__ */