refactored Linear Filter
This commit is contained in:
parent
fcfcd4cbce
commit
1eedc6c42a
@ -96,6 +96,34 @@ inline void blur(InputArray src, OutputArray dst, Size ksize, Point anchor, Stre
|
|||||||
f->apply(src, dst, stream);
|
f->apply(src, dst, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Linear Filter
|
||||||
|
|
||||||
|
//! non-separable linear 2D filter
|
||||||
|
CV_EXPORTS Ptr<Filter> createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor = Point(-1,-1),
|
||||||
|
int borderMode = BORDER_DEFAULT, Scalar borderVal = Scalar::all(0));
|
||||||
|
|
||||||
|
__OPENCV_GPUFILTERS_DEPR_BEFORE__ void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel,
|
||||||
|
Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT,
|
||||||
|
Stream& stream = Stream::Null()) __OPENCV_GPUFILTERS_DEPR_AFTER__;
|
||||||
|
|
||||||
|
inline void filter2D(InputArray src, OutputArray dst, int ddepth, InputArray kernel, Point anchor, int borderType, Stream& stream)
|
||||||
|
{
|
||||||
|
Ptr<gpu::Filter> f = gpu::createLinearFilter(src.type(), ddepth, kernel, anchor, borderType);
|
||||||
|
f->apply(src, dst, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
//! applies Laplacian operator to the image
|
||||||
|
//! supports only ksize = 1 and ksize = 3
|
||||||
|
CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -194,13 +222,7 @@ CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, co
|
|||||||
CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
|
CV_EXPORTS Ptr<FilterEngine_GPU> createMorphologyFilter_GPU(int op, int type, const Mat& kernel, GpuMat& buf,
|
||||||
const Point& anchor = Point(-1,-1), int iterations = 1);
|
const Point& anchor = Point(-1,-1), int iterations = 1);
|
||||||
|
|
||||||
//! returns 2D filter with the specified kernel
|
|
||||||
//! supports CV_8U, CV_16U and CV_32F one and four channel image
|
|
||||||
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor = Point(-1, -1), int borderType = BORDER_DEFAULT);
|
|
||||||
|
|
||||||
//! returns the non-separable linear filter engine
|
|
||||||
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
|
|
||||||
Point anchor = Point(-1,-1), int borderType = BORDER_DEFAULT);
|
|
||||||
|
|
||||||
//! returns the primitive row filter with the specified kernel.
|
//! returns the primitive row filter with the specified kernel.
|
||||||
//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
|
//! supports only CV_8UC1, CV_8UC4, CV_16SC1, CV_16SC2, CV_32SC1, CV_32FC1 source type.
|
||||||
@ -269,9 +291,6 @@ CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat&
|
|||||||
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
|
CV_EXPORTS void morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& kernel, GpuMat& buf1, GpuMat& buf2,
|
||||||
Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
Point anchor = Point(-1, -1), int iterations = 1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies non-separable 2D linear filter to the image
|
|
||||||
CV_EXPORTS void filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor=Point(-1,-1), int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
|
|
||||||
|
|
||||||
//! applies separable 2D linear filter to the image
|
//! applies separable 2D linear filter to the image
|
||||||
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
|
||||||
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
Point anchor = Point(-1,-1), int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1);
|
||||||
@ -297,10 +316,6 @@ CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double
|
|||||||
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
|
CV_EXPORTS void GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, GpuMat& buf, double sigma1, double sigma2 = 0,
|
||||||
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
|
int rowBorderType = BORDER_DEFAULT, int columnBorderType = -1, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
//! applies Laplacian operator to the image
|
|
||||||
//! supports only ksize = 1 and ksize = 3
|
|
||||||
CV_EXPORTS void Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize = 1, double scale = 1, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
|
|
||||||
|
|
||||||
}} // namespace cv { namespace gpu {
|
}} // namespace cv { namespace gpu {
|
||||||
|
|
||||||
#undef __OPENCV_GPUFILTERS_DEPR_BEFORE__
|
#undef __OPENCV_GPUFILTERS_DEPR_BEFORE__
|
||||||
|
@ -86,6 +86,51 @@ PERF_TEST_P(Sz_Type_KernelSz, Blur,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////
|
||||||
|
// Filter2D
|
||||||
|
|
||||||
|
PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
|
||||||
|
{
|
||||||
|
declare.time(20.0);
|
||||||
|
|
||||||
|
const cv::Size size = GET_PARAM(0);
|
||||||
|
const int type = GET_PARAM(1);
|
||||||
|
const int ksize = GET_PARAM(2);
|
||||||
|
|
||||||
|
cv::Mat src(size, type);
|
||||||
|
declare.in(src, WARMUP_RNG);
|
||||||
|
|
||||||
|
cv::Mat kernel(ksize, ksize, CV_32FC1);
|
||||||
|
declare.in(kernel, WARMUP_RNG);
|
||||||
|
|
||||||
|
if (PERF_RUN_GPU())
|
||||||
|
{
|
||||||
|
const cv::gpu::GpuMat d_src(src);
|
||||||
|
cv::gpu::GpuMat dst;
|
||||||
|
|
||||||
|
cv::Ptr<cv::gpu::Filter> filter2D = cv::gpu::createLinearFilter(d_src.type(), -1, kernel);
|
||||||
|
|
||||||
|
TEST_CYCLE() filter2D->apply(d_src, dst);
|
||||||
|
|
||||||
|
GPU_SANITY_CHECK(dst);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
cv::Mat dst;
|
||||||
|
|
||||||
|
TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
|
||||||
|
|
||||||
|
CPU_SANITY_CHECK(dst);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////
|
||||||
// Sobel
|
// Sobel
|
||||||
|
|
||||||
@ -330,39 +375,3 @@ PERF_TEST_P(Sz_Type_Op, MorphologyEx, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8
|
|||||||
CPU_SANITY_CHECK(dst);
|
CPU_SANITY_CHECK(dst);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////
|
|
||||||
// Filter2D
|
|
||||||
|
|
||||||
PERF_TEST_P(Sz_Type_KernelSz, Filter2D, Combine(GPU_TYPICAL_MAT_SIZES, Values(CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4), Values(3, 5, 7, 9, 11, 13, 15)))
|
|
||||||
{
|
|
||||||
declare.time(20.0);
|
|
||||||
|
|
||||||
const cv::Size size = GET_PARAM(0);
|
|
||||||
const int type = GET_PARAM(1);
|
|
||||||
const int ksize = GET_PARAM(2);
|
|
||||||
|
|
||||||
cv::Mat src(size, type);
|
|
||||||
declare.in(src, WARMUP_RNG);
|
|
||||||
|
|
||||||
cv::Mat kernel(ksize, ksize, CV_32FC1);
|
|
||||||
declare.in(kernel, WARMUP_RNG);
|
|
||||||
|
|
||||||
if (PERF_RUN_GPU())
|
|
||||||
{
|
|
||||||
const cv::gpu::GpuMat d_src(src);
|
|
||||||
cv::gpu::GpuMat dst;
|
|
||||||
|
|
||||||
TEST_CYCLE() cv::gpu::filter2D(d_src, dst, -1, kernel);
|
|
||||||
|
|
||||||
GPU_SANITY_CHECK(dst);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
cv::Mat dst;
|
|
||||||
|
|
||||||
TEST_CYCLE() cv::filter2D(src, dst, -1, kernel);
|
|
||||||
|
|
||||||
CPU_SANITY_CHECK(dst);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -48,111 +48,104 @@
|
|||||||
|
|
||||||
namespace cv { namespace gpu { namespace cudev
|
namespace cv { namespace gpu { namespace cudev
|
||||||
{
|
{
|
||||||
namespace imgproc
|
template <class SrcPtr, typename D>
|
||||||
|
__global__ void filter2D(const SrcPtr src, PtrStepSz<D> dst,
|
||||||
|
const float* __restrict__ kernel,
|
||||||
|
const int kWidth, const int kHeight,
|
||||||
|
const int anchorX, const int anchorY)
|
||||||
{
|
{
|
||||||
#define FILTER2D_MAX_KERNEL_SIZE 16
|
typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
|
||||||
|
|
||||||
__constant__ float c_filter2DKernel[FILTER2D_MAX_KERNEL_SIZE * FILTER2D_MAX_KERNEL_SIZE];
|
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||||
|
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||||
|
|
||||||
template <class SrcT, typename D>
|
if (x >= dst.cols || y >= dst.rows)
|
||||||
__global__ void filter2D(const SrcT src, PtrStepSz<D> dst, const int kWidth, const int kHeight, const int anchorX, const int anchorY)
|
return;
|
||||||
|
|
||||||
|
sum_t res = VecTraits<sum_t>::all(0);
|
||||||
|
int kInd = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < kHeight; ++i)
|
||||||
{
|
{
|
||||||
typedef typename TypeVec<float, VecTraits<D>::cn>::vec_type sum_t;
|
for (int j = 0; j < kWidth; ++j)
|
||||||
|
res = res + src(y - anchorY + i, x - anchorX + j) * kernel[kInd++];
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
|
|
||||||
if (x >= dst.cols || y >= dst.rows)
|
|
||||||
return;
|
|
||||||
|
|
||||||
sum_t res = VecTraits<sum_t>::all(0);
|
|
||||||
int kInd = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < kHeight; ++i)
|
|
||||||
{
|
|
||||||
for (int j = 0; j < kWidth; ++j)
|
|
||||||
res = res + src(y - anchorY + i, x - anchorX + j) * c_filter2DKernel[kInd++];
|
|
||||||
}
|
|
||||||
|
|
||||||
dst(y, x) = saturate_cast<D>(res);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
|
dst(y, x) = saturate_cast<D>(res);
|
||||||
|
|
||||||
#define IMPLEMENT_FILTER2D_TEX_READER(type) \
|
|
||||||
texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
|
||||||
struct tex_filter2D_ ## type ## _reader \
|
|
||||||
{ \
|
|
||||||
typedef type elem_type; \
|
|
||||||
typedef int index_type; \
|
|
||||||
const int xoff; \
|
|
||||||
const int yoff; \
|
|
||||||
tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
|
||||||
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
|
||||||
{ \
|
|
||||||
return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
|
|
||||||
} \
|
|
||||||
}; \
|
|
||||||
template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
|
|
||||||
{ \
|
|
||||||
static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, \
|
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
|
|
||||||
{ \
|
|
||||||
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
|
||||||
dim3 block(16, 16); \
|
|
||||||
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
|
||||||
bindTexture(&tex_filter2D_ ## type , srcWhole); \
|
|
||||||
tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
|
|
||||||
Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
|
|
||||||
BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
|
|
||||||
filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kWidth, kHeight, anchorX, anchorY); \
|
|
||||||
cudaSafeCall( cudaGetLastError() ); \
|
|
||||||
if (stream == 0) \
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() ); \
|
|
||||||
} \
|
|
||||||
};
|
|
||||||
|
|
||||||
IMPLEMENT_FILTER2D_TEX_READER(uchar);
|
|
||||||
IMPLEMENT_FILTER2D_TEX_READER(uchar4);
|
|
||||||
|
|
||||||
IMPLEMENT_FILTER2D_TEX_READER(ushort);
|
|
||||||
IMPLEMENT_FILTER2D_TEX_READER(ushort4);
|
|
||||||
|
|
||||||
IMPLEMENT_FILTER2D_TEX_READER(float);
|
|
||||||
IMPLEMENT_FILTER2D_TEX_READER(float4);
|
|
||||||
|
|
||||||
#undef IMPLEMENT_FILTER2D_TEX_READER
|
|
||||||
|
|
||||||
template <typename T, typename D>
|
|
||||||
void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
|
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream)
|
|
||||||
{
|
|
||||||
typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
|
|
||||||
static const func_t funcs[] =
|
|
||||||
{
|
|
||||||
Filter2DCaller<T, D, BrdConstant>::call,
|
|
||||||
Filter2DCaller<T, D, BrdReplicate>::call,
|
|
||||||
Filter2DCaller<T, D, BrdReflect>::call,
|
|
||||||
Filter2DCaller<T, D, BrdWrap>::call,
|
|
||||||
Filter2DCaller<T, D, BrdReflect101>::call
|
|
||||||
};
|
|
||||||
|
|
||||||
if (stream == 0)
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbol(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice) );
|
|
||||||
else
|
|
||||||
cudaSafeCall( cudaMemcpyToSymbolAsync(c_filter2DKernel, kernel, kWidth * kHeight * sizeof(float), 0, cudaMemcpyDeviceToDevice, stream) );
|
|
||||||
|
|
||||||
funcs[borderMode](static_cast< PtrStepSz<T> >(srcWhole), ofsX, ofsY, static_cast< PtrStepSz<D> >(dst), kWidth, kHeight, anchorX, anchorY, borderValue, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
template void filter2D_gpu<uchar, uchar>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
template void filter2D_gpu<uchar4, uchar4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
template void filter2D_gpu<ushort, ushort>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
template void filter2D_gpu<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
template void filter2D_gpu<float, float>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
template void filter2D_gpu<float4, float4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel, int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T, typename D, template <typename> class Brd> struct Filter2DCaller;
|
||||||
|
|
||||||
|
#define IMPLEMENT_FILTER2D_TEX_READER(type) \
|
||||||
|
texture< type , cudaTextureType2D, cudaReadModeElementType> tex_filter2D_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
|
||||||
|
struct tex_filter2D_ ## type ## _reader \
|
||||||
|
{ \
|
||||||
|
typedef type elem_type; \
|
||||||
|
typedef int index_type; \
|
||||||
|
const int xoff; \
|
||||||
|
const int yoff; \
|
||||||
|
tex_filter2D_ ## type ## _reader (int xoff_, int yoff_) : xoff(xoff_), yoff(yoff_) {} \
|
||||||
|
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
|
||||||
|
{ \
|
||||||
|
return tex2D(tex_filter2D_ ## type , x + xoff, y + yoff); \
|
||||||
|
} \
|
||||||
|
}; \
|
||||||
|
template <typename D, template <typename> class Brd> struct Filter2DCaller< type , D, Brd> \
|
||||||
|
{ \
|
||||||
|
static void call(const PtrStepSz< type > srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel, \
|
||||||
|
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream) \
|
||||||
|
{ \
|
||||||
|
typedef typename TypeVec<float, VecTraits< type >::cn>::vec_type work_type; \
|
||||||
|
dim3 block(16, 16); \
|
||||||
|
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
|
||||||
|
bindTexture(&tex_filter2D_ ## type , srcWhole); \
|
||||||
|
tex_filter2D_ ## type ##_reader texSrc(xoff, yoff); \
|
||||||
|
Brd<work_type> brd(dst.rows, dst.cols, VecTraits<work_type>::make(borderValue)); \
|
||||||
|
BorderReader< tex_filter2D_ ## type ##_reader, Brd<work_type> > brdSrc(texSrc, brd); \
|
||||||
|
filter2D<<<grid, block, 0, stream>>>(brdSrc, dst, kernel, kWidth, kHeight, anchorX, anchorY); \
|
||||||
|
cudaSafeCall( cudaGetLastError() ); \
|
||||||
|
if (stream == 0) \
|
||||||
|
cudaSafeCall( cudaDeviceSynchronize() ); \
|
||||||
|
} \
|
||||||
|
};
|
||||||
|
|
||||||
|
IMPLEMENT_FILTER2D_TEX_READER(uchar);
|
||||||
|
IMPLEMENT_FILTER2D_TEX_READER(uchar4);
|
||||||
|
|
||||||
|
IMPLEMENT_FILTER2D_TEX_READER(ushort);
|
||||||
|
IMPLEMENT_FILTER2D_TEX_READER(ushort4);
|
||||||
|
|
||||||
|
IMPLEMENT_FILTER2D_TEX_READER(float);
|
||||||
|
IMPLEMENT_FILTER2D_TEX_READER(float4);
|
||||||
|
|
||||||
|
#undef IMPLEMENT_FILTER2D_TEX_READER
|
||||||
|
|
||||||
|
template <typename T, typename D>
|
||||||
|
void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
|
||||||
|
int kWidth, int kHeight, int anchorX, int anchorY,
|
||||||
|
int borderMode, const float* borderValue, cudaStream_t stream)
|
||||||
|
{
|
||||||
|
typedef void (*func_t)(const PtrStepSz<T> srcWhole, int xoff, int yoff, PtrStepSz<D> dst, const float* kernel,
|
||||||
|
int kWidth, int kHeight, int anchorX, int anchorY, const float* borderValue, cudaStream_t stream);
|
||||||
|
static const func_t funcs[] =
|
||||||
|
{
|
||||||
|
Filter2DCaller<T, D, BrdConstant>::call,
|
||||||
|
Filter2DCaller<T, D, BrdReplicate>::call,
|
||||||
|
Filter2DCaller<T, D, BrdReflect>::call,
|
||||||
|
Filter2DCaller<T, D, BrdWrap>::call,
|
||||||
|
Filter2DCaller<T, D, BrdReflect101>::call
|
||||||
|
};
|
||||||
|
|
||||||
|
funcs[borderMode]((PtrStepSz<T>) srcWhole, ofsX, ofsY, (PtrStepSz<D>) dst, kernel,
|
||||||
|
kWidth, kHeight, anchorX, anchorY, borderValue, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
template void filter2D<uchar , uchar >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
template void filter2D<uchar4 , uchar4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
template void filter2D<ushort , ushort >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
template void filter2D<ushort4, ushort4>(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
template void filter2D<float , float >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
template void filter2D<float4 , float4 >(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel, int kWidth, int kHeight, int anchorX, int anchorY, int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
#endif // CUDA_DISABLER
|
#endif // CUDA_DISABLER
|
||||||
|
@ -49,6 +49,8 @@ using namespace cv::gpu;
|
|||||||
|
|
||||||
Ptr<Filter> cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
|
Ptr<Filter> cv::gpu::createBoxFilter(int, int, Size, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
|
||||||
|
|
||||||
|
Ptr<Filter> cv::gpu::createLinearFilter(int, int, InputArray, Point, int, Scalar) { throw_no_cuda(); return Ptr<Filter>(); }
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
Ptr<FilterEngine_GPU> cv::gpu::createFilter2D_GPU(const Ptr<BaseFilter_GPU>&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
Ptr<FilterEngine_GPU> cv::gpu::createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>&, const Ptr<BaseColumnFilter_GPU>&, int, int, int, GpuMat&) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
||||||
@ -57,8 +59,6 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getColumnSumFilter_GPU(int, int, int, int) {
|
|||||||
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr<BaseFilter_GPU>(0); }
|
Ptr<BaseFilter_GPU> cv::gpu::getMorphologyFilter_GPU(int, int, const Mat&, const Size&, Point) { throw_no_cuda(); return Ptr<BaseFilter_GPU>(0); }
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, const Point&, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
Ptr<FilterEngine_GPU> cv::gpu::createMorphologyFilter_GPU(int, int, const Mat&, GpuMat&, const Point&, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
||||||
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr<BaseFilter_GPU>(0); }
|
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int, int, const Mat&, Point, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
|
||||||
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr<BaseRowFilter_GPU>(0); }
|
Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr<BaseRowFilter_GPU>(0); }
|
||||||
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr<BaseColumnFilter_GPU>(0); }
|
Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Mat&, int, int) { throw_no_cuda(); return Ptr<BaseColumnFilter_GPU>(0); }
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&, int, int) { throw_no_cuda(); return Ptr<FilterEngine_GPU>(0); }
|
||||||
@ -76,7 +76,6 @@ void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_no_
|
|||||||
void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); }
|
void cv::gpu::dilate(const GpuMat&, GpuMat&, const Mat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); }
|
||||||
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_no_cuda(); }
|
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, Point, int) { throw_no_cuda(); }
|
||||||
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); }
|
void cv::gpu::morphologyEx(const GpuMat&, GpuMat&, int, const Mat&, GpuMat&, GpuMat&, Point, int, Stream&) { throw_no_cuda(); }
|
||||||
void cv::gpu::filter2D(const GpuMat&, GpuMat&, int, const Mat&, Point, int, Stream&) { throw_no_cuda(); }
|
|
||||||
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_no_cuda(); }
|
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, Point, int, int) { throw_no_cuda(); }
|
||||||
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_no_cuda(); }
|
void cv::gpu::sepFilter2D(const GpuMat&, GpuMat&, int, const Mat&, const Mat&, GpuMat&, Point, int, int, Stream&) { throw_no_cuda(); }
|
||||||
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_no_cuda(); }
|
void cv::gpu::Sobel(const GpuMat&, GpuMat&, int, int, int, int, double, int, int) { throw_no_cuda(); }
|
||||||
@ -188,6 +187,138 @@ Ptr<Filter> cv::gpu::createBoxFilter(int srcType, int dstType, Size ksize, Point
|
|||||||
return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
|
return new NPPBoxFilter(srcType, dstType, ksize, anchor, borderMode, borderVal);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Linear Filter
|
||||||
|
|
||||||
|
namespace cv { namespace gpu { namespace cudev
|
||||||
|
{
|
||||||
|
template <typename T, typename D>
|
||||||
|
void filter2D(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
|
||||||
|
int kWidth, int kHeight, int anchorX, int anchorY,
|
||||||
|
int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
}}}
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
class LinearFilter : public Filter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
LinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal);
|
||||||
|
|
||||||
|
void apply(InputArray src, OutputArray dst, Stream& stream = Stream::Null());
|
||||||
|
|
||||||
|
private:
|
||||||
|
typedef void (*filter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst, const float* kernel,
|
||||||
|
int kWidth, int kHeight, int anchorX, int anchorY,
|
||||||
|
int borderMode, const float* borderValue, cudaStream_t stream);
|
||||||
|
|
||||||
|
GpuMat kernel_;
|
||||||
|
Point anchor_;
|
||||||
|
int type_;
|
||||||
|
filter2D_t func_;
|
||||||
|
int borderMode_;
|
||||||
|
Scalar_<float> borderVal_;
|
||||||
|
};
|
||||||
|
|
||||||
|
LinearFilter::LinearFilter(int srcType, int dstType, InputArray _kernel, Point anchor, int borderMode, Scalar borderVal) :
|
||||||
|
anchor_(anchor), type_(srcType), borderMode_(borderMode), borderVal_(borderVal)
|
||||||
|
{
|
||||||
|
const int sdepth = CV_MAT_DEPTH(srcType);
|
||||||
|
const int scn = CV_MAT_CN(srcType);
|
||||||
|
|
||||||
|
Mat kernel = _kernel.getMat();
|
||||||
|
|
||||||
|
CV_Assert( sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F );
|
||||||
|
CV_Assert( scn == 1 || scn == 4 );
|
||||||
|
CV_Assert( dstType == srcType );
|
||||||
|
CV_Assert( kernel.channels() == 1 );
|
||||||
|
CV_Assert( borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT || borderMode == BORDER_REFLECT || borderMode == BORDER_WRAP );
|
||||||
|
|
||||||
|
Mat kernel32F;
|
||||||
|
kernel.convertTo(kernel32F, CV_32F);
|
||||||
|
|
||||||
|
kernel_ = gpu::createContinuous(kernel.size(), CV_32FC1);
|
||||||
|
kernel_.upload(kernel32F);
|
||||||
|
|
||||||
|
normalizeAnchor(anchor_, kernel.size());
|
||||||
|
|
||||||
|
switch (srcType)
|
||||||
|
{
|
||||||
|
case CV_8UC1:
|
||||||
|
func_ = cudev::filter2D<uchar, uchar>;
|
||||||
|
break;
|
||||||
|
case CV_8UC4:
|
||||||
|
func_ = cudev::filter2D<uchar4, uchar4>;
|
||||||
|
break;
|
||||||
|
case CV_16UC1:
|
||||||
|
func_ = cudev::filter2D<ushort, ushort>;
|
||||||
|
break;
|
||||||
|
case CV_16UC4:
|
||||||
|
func_ = cudev::filter2D<ushort4, ushort4>;
|
||||||
|
break;
|
||||||
|
case CV_32FC1:
|
||||||
|
func_ = cudev::filter2D<float, float>;
|
||||||
|
break;
|
||||||
|
case CV_32FC4:
|
||||||
|
func_ = cudev::filter2D<float4, float4>;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void LinearFilter::apply(InputArray _src, OutputArray _dst, Stream& _stream)
|
||||||
|
{
|
||||||
|
GpuMat src = _src.getGpuMat();
|
||||||
|
CV_Assert( src.type() == type_ );
|
||||||
|
|
||||||
|
_dst.create(src.size(), src.type());
|
||||||
|
GpuMat dst = _dst.getGpuMat();
|
||||||
|
|
||||||
|
Point ofs;
|
||||||
|
Size wholeSize;
|
||||||
|
src.locateROI(wholeSize, ofs);
|
||||||
|
|
||||||
|
GpuMat srcWhole(wholeSize, src.type(), src.datastart);
|
||||||
|
|
||||||
|
func_(srcWhole, ofs.x, ofs.y, dst, kernel_.ptr<float>(),
|
||||||
|
kernel_.cols, kernel_.rows, anchor_.x, anchor_.y,
|
||||||
|
borderMode_, borderVal_.val, StreamAccessor::getStream(_stream));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ptr<Filter> cv::gpu::createLinearFilter(int srcType, int dstType, InputArray kernel, Point anchor, int borderMode, Scalar borderVal)
|
||||||
|
{
|
||||||
|
if (dstType < 0)
|
||||||
|
dstType = srcType;
|
||||||
|
|
||||||
|
return new LinearFilter(srcType, dstType, kernel, anchor, borderMode, borderVal);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream)
|
||||||
|
{
|
||||||
|
CV_Assert(ksize == 1 || ksize == 3);
|
||||||
|
|
||||||
|
static const int K[2][9] =
|
||||||
|
{
|
||||||
|
{0, 1, 0, 1, -4, 1, 0, 1, 0},
|
||||||
|
{2, 0, 2, 0, -8, 0, 2, 0, 2}
|
||||||
|
};
|
||||||
|
Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);
|
||||||
|
if (scale != 1)
|
||||||
|
kernel *= scale;
|
||||||
|
|
||||||
|
Ptr<gpu::Filter> f = gpu::createLinearFilter(src.type(), ddepth, kernel, Point(-1,-1), borderType);
|
||||||
|
f->apply(src, dst, stream);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@ -702,172 +833,6 @@ void cv::gpu::morphologyEx(const GpuMat& src, GpuMat& dst, int op, const Mat& ke
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Linear Filter
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace cudev
|
|
||||||
{
|
|
||||||
namespace imgproc
|
|
||||||
{
|
|
||||||
template <typename T, typename D>
|
|
||||||
void filter2D_gpu(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
|
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
}
|
|
||||||
}}}
|
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
typedef NppStatus (*nppFilter2D_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
|
|
||||||
const Npp32s * pKernel, NppiSize oKernelSize, NppiPoint oAnchor, Npp32s nDivisor);
|
|
||||||
|
|
||||||
struct NPPLinearFilter : public BaseFilter_GPU
|
|
||||||
{
|
|
||||||
NPPLinearFilter(const Size& ksize_, const Point& anchor_, const GpuMat& kernel_, Npp32s nDivisor_, nppFilter2D_t func_) :
|
|
||||||
BaseFilter_GPU(ksize_, anchor_), kernel(kernel_), nDivisor(nDivisor_), func(func_) {}
|
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
|
|
||||||
{
|
|
||||||
NppiSize sz;
|
|
||||||
sz.width = src.cols;
|
|
||||||
sz.height = src.rows;
|
|
||||||
NppiSize oKernelSize;
|
|
||||||
oKernelSize.height = ksize.height;
|
|
||||||
oKernelSize.width = ksize.width;
|
|
||||||
NppiPoint oAnchor;
|
|
||||||
oAnchor.x = anchor.x;
|
|
||||||
oAnchor.y = anchor.y;
|
|
||||||
|
|
||||||
cudaStream_t stream = StreamAccessor::getStream(s);
|
|
||||||
|
|
||||||
NppStreamHandler h(stream);
|
|
||||||
|
|
||||||
nppSafeCall( func(src.ptr<Npp8u>(), static_cast<int>(src.step), dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz,
|
|
||||||
kernel.ptr<Npp32s>(), oKernelSize, oAnchor, nDivisor) );
|
|
||||||
|
|
||||||
if (stream == 0)
|
|
||||||
cudaSafeCall( cudaDeviceSynchronize() );
|
|
||||||
}
|
|
||||||
|
|
||||||
GpuMat kernel;
|
|
||||||
Npp32s nDivisor;
|
|
||||||
nppFilter2D_t func;
|
|
||||||
};
|
|
||||||
|
|
||||||
typedef void (*gpuFilter2D_t)(PtrStepSzb srcWhole, int ofsX, int ofsY, PtrStepSzb dst,
|
|
||||||
int kWidth, int kHeight, int anchorX, int anchorY, const float* kernel,
|
|
||||||
int borderMode, const float* borderValue, cudaStream_t stream);
|
|
||||||
|
|
||||||
struct GpuFilter2D : public BaseFilter_GPU
|
|
||||||
{
|
|
||||||
GpuFilter2D(Size ksize_, Point anchor_, gpuFilter2D_t func_, const GpuMat& kernel_, int brd_type_) :
|
|
||||||
BaseFilter_GPU(ksize_, anchor_), func(func_), kernel(kernel_), brd_type(brd_type_)
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null())
|
|
||||||
{
|
|
||||||
using namespace cv::gpu::cudev::imgproc;
|
|
||||||
|
|
||||||
Point ofs;
|
|
||||||
Size wholeSize;
|
|
||||||
src.locateROI(wholeSize, ofs);
|
|
||||||
GpuMat srcWhole(wholeSize, src.type(), src.datastart);
|
|
||||||
|
|
||||||
static const Scalar_<float> zero = Scalar_<float>::all(0.0f);
|
|
||||||
func(srcWhole, ofs.x, ofs.y, dst, ksize.width, ksize.height, anchor.x, anchor.y, kernel.ptr<float>(), brd_type, zero.val, StreamAccessor::getStream(stream));
|
|
||||||
}
|
|
||||||
|
|
||||||
gpuFilter2D_t func;
|
|
||||||
GpuMat kernel;
|
|
||||||
int brd_type;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
Ptr<BaseFilter_GPU> cv::gpu::getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int brd_type)
|
|
||||||
{
|
|
||||||
using namespace cv::gpu::cudev::imgproc;
|
|
||||||
|
|
||||||
int sdepth = CV_MAT_DEPTH(srcType);
|
|
||||||
int scn = CV_MAT_CN(srcType);
|
|
||||||
|
|
||||||
CV_Assert(sdepth == CV_8U || sdepth == CV_16U || sdepth == CV_32F);
|
|
||||||
CV_Assert(scn == 1 || scn == 4);
|
|
||||||
CV_Assert(dstType == srcType);
|
|
||||||
CV_Assert(brd_type == BORDER_REFLECT101 || brd_type == BORDER_REPLICATE || brd_type == BORDER_CONSTANT || brd_type == BORDER_REFLECT || brd_type == BORDER_WRAP);
|
|
||||||
|
|
||||||
Size ksize = kernel.size();
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
if ((srcType == CV_8UC1 || srcType == CV_8UC4) && brd_type == BORDER_CONSTANT)
|
|
||||||
{
|
|
||||||
static const nppFilter2D_t cppFilter2D_callers[] = {0, nppiFilter_8u_C1R, 0, 0, nppiFilter_8u_C4R};
|
|
||||||
|
|
||||||
GpuMat gpu_krnl;
|
|
||||||
int nDivisor;
|
|
||||||
normalizeKernel(kernel, gpu_krnl, CV_32S, &nDivisor, true);
|
|
||||||
|
|
||||||
normalizeAnchor(anchor, ksize);
|
|
||||||
|
|
||||||
return Ptr<BaseFilter_GPU>(new NPPLinearFilter(ksize, anchor, gpu_krnl, nDivisor, cppFilter2D_callers[CV_MAT_CN(srcType)]));
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
CV_Assert(ksize.width * ksize.height <= 16 * 16);
|
|
||||||
|
|
||||||
GpuMat gpu_krnl;
|
|
||||||
normalizeKernel(kernel, gpu_krnl, CV_32F);
|
|
||||||
|
|
||||||
normalizeAnchor(anchor, ksize);
|
|
||||||
|
|
||||||
gpuFilter2D_t func = 0;
|
|
||||||
|
|
||||||
switch (srcType)
|
|
||||||
{
|
|
||||||
case CV_8UC1:
|
|
||||||
func = filter2D_gpu<uchar, uchar>;
|
|
||||||
break;
|
|
||||||
case CV_8UC4:
|
|
||||||
func = filter2D_gpu<uchar4, uchar4>;
|
|
||||||
break;
|
|
||||||
case CV_16UC1:
|
|
||||||
func = filter2D_gpu<ushort, ushort>;
|
|
||||||
break;
|
|
||||||
case CV_16UC4:
|
|
||||||
func = filter2D_gpu<ushort4, ushort4>;
|
|
||||||
break;
|
|
||||||
case CV_32FC1:
|
|
||||||
func = filter2D_gpu<float, float>;
|
|
||||||
break;
|
|
||||||
case CV_32FC4:
|
|
||||||
func = filter2D_gpu<float4, float4>;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return Ptr<BaseFilter_GPU>(new GpuFilter2D(ksize, anchor, func, gpu_krnl, brd_type));
|
|
||||||
}
|
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> cv::gpu::createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, Point anchor, int borderType)
|
|
||||||
{
|
|
||||||
Ptr<BaseFilter_GPU> linearFilter = getLinearFilter_GPU(srcType, dstType, kernel, anchor, borderType);
|
|
||||||
|
|
||||||
return createFilter2D_GPU(linearFilter, srcType, dstType);
|
|
||||||
}
|
|
||||||
|
|
||||||
void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernel, Point anchor, int borderType, Stream& stream)
|
|
||||||
{
|
|
||||||
if (ddepth < 0)
|
|
||||||
ddepth = src.depth();
|
|
||||||
|
|
||||||
int dst_type = CV_MAKE_TYPE(ddepth, src.channels());
|
|
||||||
|
|
||||||
Ptr<FilterEngine_GPU> f = createLinearFilter_GPU(src.type(), dst_type, kernel, anchor, borderType);
|
|
||||||
|
|
||||||
dst.create(src.size(), dst_type);
|
|
||||||
|
|
||||||
f->apply(src, dst, Rect(0, 0, src.cols, src.rows), stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Separable Linear Filter
|
// Separable Linear Filter
|
||||||
|
|
||||||
@ -1208,22 +1173,6 @@ void cv::gpu::Scharr(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy,
|
|||||||
sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream);
|
sepFilter2D(src, dst, ddepth, kx, ky, buf, Point(-1,-1), rowBorderType, columnBorderType, stream);
|
||||||
}
|
}
|
||||||
|
|
||||||
void cv::gpu::Laplacian(const GpuMat& src, GpuMat& dst, int ddepth, int ksize, double scale, int borderType, Stream& stream)
|
|
||||||
{
|
|
||||||
CV_Assert(ksize == 1 || ksize == 3);
|
|
||||||
|
|
||||||
static const int K[2][9] =
|
|
||||||
{
|
|
||||||
{0, 1, 0, 1, -4, 1, 0, 1, 0},
|
|
||||||
{2, 0, 2, 0, -8, 0, 2, 0, 2}
|
|
||||||
};
|
|
||||||
Mat kernel(3, 3, CV_32S, (void*)K[ksize == 3]);
|
|
||||||
if (scale != 1)
|
|
||||||
kernel *= scale;
|
|
||||||
|
|
||||||
filter2D(src, dst, ddepth, kernel, Point(-1,-1), borderType, stream);
|
|
||||||
}
|
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Gaussian Filter
|
// Gaussian Filter
|
||||||
|
|
||||||
|
@ -118,6 +118,121 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, Blur, testing::Combine(
|
|||||||
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
|
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
|
||||||
WHOLE_SUBMAT));
|
WHOLE_SUBMAT));
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Filter2D
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
|
||||||
|
{
|
||||||
|
cv::gpu::DeviceInfo devInfo;
|
||||||
|
cv::Size size;
|
||||||
|
int type;
|
||||||
|
cv::Size ksize;
|
||||||
|
cv::Point anchor;
|
||||||
|
int borderType;
|
||||||
|
bool useRoi;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
devInfo = GET_PARAM(0);
|
||||||
|
size = GET_PARAM(1);
|
||||||
|
type = GET_PARAM(2);
|
||||||
|
ksize = GET_PARAM(3);
|
||||||
|
anchor = GET_PARAM(4);
|
||||||
|
borderType = GET_PARAM(5);
|
||||||
|
useRoi = GET_PARAM(6);
|
||||||
|
|
||||||
|
cv::gpu::setDevice(devInfo.deviceID());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
GPU_TEST_P(Filter2D, Accuracy)
|
||||||
|
{
|
||||||
|
cv::Mat src = randomMat(size, type);
|
||||||
|
cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
|
||||||
|
|
||||||
|
cv::Ptr<cv::gpu::Filter> filter2D = cv::gpu::createLinearFilter(src.type(), -1, kernel, anchor, borderType);
|
||||||
|
|
||||||
|
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
|
||||||
|
filter2D->apply(loadMat(src, useRoi), dst);
|
||||||
|
|
||||||
|
cv::Mat dst_gold;
|
||||||
|
cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType);
|
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine(
|
||||||
|
ALL_DEVICES,
|
||||||
|
DIFFERENT_SIZES,
|
||||||
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
|
||||||
|
testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))),
|
||||||
|
testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))),
|
||||||
|
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
|
||||||
|
WHOLE_SUBMAT));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Laplacian
|
||||||
|
|
||||||
|
PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi)
|
||||||
|
{
|
||||||
|
cv::gpu::DeviceInfo devInfo;
|
||||||
|
cv::Size size;
|
||||||
|
int type;
|
||||||
|
cv::Size ksize;
|
||||||
|
bool useRoi;
|
||||||
|
|
||||||
|
virtual void SetUp()
|
||||||
|
{
|
||||||
|
devInfo = GET_PARAM(0);
|
||||||
|
size = GET_PARAM(1);
|
||||||
|
type = GET_PARAM(2);
|
||||||
|
ksize = GET_PARAM(3);
|
||||||
|
useRoi = GET_PARAM(4);
|
||||||
|
|
||||||
|
cv::gpu::setDevice(devInfo.deviceID());
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
GPU_TEST_P(Laplacian, Accuracy)
|
||||||
|
{
|
||||||
|
cv::Mat src = randomMat(size, type);
|
||||||
|
|
||||||
|
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
|
||||||
|
cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width);
|
||||||
|
|
||||||
|
cv::Mat dst_gold;
|
||||||
|
cv::Laplacian(src, dst_gold, -1, ksize.width);
|
||||||
|
|
||||||
|
EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3);
|
||||||
|
}
|
||||||
|
|
||||||
|
INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine(
|
||||||
|
ALL_DEVICES,
|
||||||
|
DIFFERENT_SIZES,
|
||||||
|
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
|
||||||
|
testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))),
|
||||||
|
WHOLE_SUBMAT));
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Sobel
|
// Sobel
|
||||||
|
|
||||||
@ -332,49 +447,6 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, GaussianBlur, testing::Combine(
|
|||||||
BorderType(cv::BORDER_REFLECT)),
|
BorderType(cv::BORDER_REFLECT)),
|
||||||
WHOLE_SUBMAT));
|
WHOLE_SUBMAT));
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Laplacian
|
|
||||||
|
|
||||||
PARAM_TEST_CASE(Laplacian, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, UseRoi)
|
|
||||||
{
|
|
||||||
cv::gpu::DeviceInfo devInfo;
|
|
||||||
cv::Size size;
|
|
||||||
int type;
|
|
||||||
cv::Size ksize;
|
|
||||||
bool useRoi;
|
|
||||||
|
|
||||||
virtual void SetUp()
|
|
||||||
{
|
|
||||||
devInfo = GET_PARAM(0);
|
|
||||||
size = GET_PARAM(1);
|
|
||||||
type = GET_PARAM(2);
|
|
||||||
ksize = GET_PARAM(3);
|
|
||||||
useRoi = GET_PARAM(4);
|
|
||||||
|
|
||||||
cv::gpu::setDevice(devInfo.deviceID());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
GPU_TEST_P(Laplacian, Accuracy)
|
|
||||||
{
|
|
||||||
cv::Mat src = randomMat(size, type);
|
|
||||||
|
|
||||||
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
|
|
||||||
cv::gpu::Laplacian(loadMat(src, useRoi), dst, -1, ksize.width);
|
|
||||||
|
|
||||||
cv::Mat dst_gold;
|
|
||||||
cv::Laplacian(src, dst_gold, -1, ksize.width);
|
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, dst, src.depth() < CV_32F ? 0.0 : 1e-3);
|
|
||||||
}
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_Filters, Laplacian, testing::Combine(
|
|
||||||
ALL_DEVICES,
|
|
||||||
DIFFERENT_SIZES,
|
|
||||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_32FC1)),
|
|
||||||
testing::Values(KSize(cv::Size(1, 1)), KSize(cv::Size(3, 3))),
|
|
||||||
WHOLE_SUBMAT));
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Erode
|
// Erode
|
||||||
|
|
||||||
@ -527,56 +599,4 @@ INSTANTIATE_TEST_CASE_P(GPU_Filters, MorphEx, testing::Combine(
|
|||||||
testing::Values(Iterations(1), Iterations(2), Iterations(3)),
|
testing::Values(Iterations(1), Iterations(2), Iterations(3)),
|
||||||
WHOLE_SUBMAT));
|
WHOLE_SUBMAT));
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
// Filter2D
|
|
||||||
|
|
||||||
PARAM_TEST_CASE(Filter2D, cv::gpu::DeviceInfo, cv::Size, MatType, KSize, Anchor, BorderType, UseRoi)
|
|
||||||
{
|
|
||||||
cv::gpu::DeviceInfo devInfo;
|
|
||||||
cv::Size size;
|
|
||||||
int type;
|
|
||||||
cv::Size ksize;
|
|
||||||
cv::Point anchor;
|
|
||||||
int borderType;
|
|
||||||
bool useRoi;
|
|
||||||
|
|
||||||
cv::Mat img;
|
|
||||||
|
|
||||||
virtual void SetUp()
|
|
||||||
{
|
|
||||||
devInfo = GET_PARAM(0);
|
|
||||||
size = GET_PARAM(1);
|
|
||||||
type = GET_PARAM(2);
|
|
||||||
ksize = GET_PARAM(3);
|
|
||||||
anchor = GET_PARAM(4);
|
|
||||||
borderType = GET_PARAM(5);
|
|
||||||
useRoi = GET_PARAM(6);
|
|
||||||
|
|
||||||
cv::gpu::setDevice(devInfo.deviceID());
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
GPU_TEST_P(Filter2D, Accuracy)
|
|
||||||
{
|
|
||||||
cv::Mat src = randomMat(size, type);
|
|
||||||
cv::Mat kernel = randomMat(cv::Size(ksize.width, ksize.height), CV_32FC1, 0.0, 1.0);
|
|
||||||
|
|
||||||
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
|
|
||||||
cv::gpu::filter2D(loadMat(src, useRoi), dst, -1, kernel, anchor, borderType);
|
|
||||||
|
|
||||||
cv::Mat dst_gold;
|
|
||||||
cv::filter2D(src, dst_gold, -1, kernel, anchor, 0, borderType);
|
|
||||||
|
|
||||||
EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1.0);
|
|
||||||
}
|
|
||||||
|
|
||||||
INSTANTIATE_TEST_CASE_P(GPU_Filters, Filter2D, testing::Combine(
|
|
||||||
ALL_DEVICES,
|
|
||||||
DIFFERENT_SIZES,
|
|
||||||
testing::Values(MatType(CV_8UC1), MatType(CV_8UC4), MatType(CV_16UC1), MatType(CV_16UC4), MatType(CV_32FC1), MatType(CV_32FC4)),
|
|
||||||
testing::Values(KSize(cv::Size(3, 3)), KSize(cv::Size(5, 5)), KSize(cv::Size(7, 7)), KSize(cv::Size(11, 11)), KSize(cv::Size(13, 13)), KSize(cv::Size(15, 15))),
|
|
||||||
testing::Values(Anchor(cv::Point(-1, -1)), Anchor(cv::Point(0, 0)), Anchor(cv::Point(2, 2))),
|
|
||||||
testing::Values(BorderType(cv::BORDER_REFLECT101), BorderType(cv::BORDER_REPLICATE), BorderType(cv::BORDER_CONSTANT), BorderType(cv::BORDER_REFLECT)),
|
|
||||||
WHOLE_SUBMAT));
|
|
||||||
|
|
||||||
#endif // HAVE_CUDA
|
#endif // HAVE_CUDA
|
||||||
|
@ -961,10 +961,11 @@ TEST(filter2D)
|
|||||||
gpu::GpuMat d_src(src);
|
gpu::GpuMat d_src(src);
|
||||||
gpu::GpuMat d_dst;
|
gpu::GpuMat d_dst;
|
||||||
|
|
||||||
gpu::filter2D(d_src, d_dst, -1, kernel);
|
Ptr<gpu::Filter> filter2D = gpu::createLinearFilter(d_src.type(), -1, kernel);
|
||||||
|
filter2D->apply(d_src, d_dst);
|
||||||
|
|
||||||
GPU_ON;
|
GPU_ON;
|
||||||
gpu::filter2D(d_src, d_dst, -1, kernel);
|
filter2D->apply(d_src, d_dst);
|
||||||
GPU_OFF;
|
GPU_OFF;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user