added support of BORDER_REFLECT into gpu::cornerHarris and etc, added support of border extr. int linear filters
This commit is contained in:
parent
4789306499
commit
ab04a03621
@ -628,13 +628,11 @@ namespace cv
|
||||
CV_EXPORTS void Canny(const GpuMat& image, GpuMat& edges, double threshold1, double threshold2, int apertureSize = 3);
|
||||
|
||||
//! computes Harris cornerness criteria at each image pixel
|
||||
// (does BORDER_CONSTANT interpolation with 0 as the fill value)
|
||||
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k);
|
||||
CV_EXPORTS void cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType=BORDER_REFLECT101);
|
||||
|
||||
|
||||
//! computes minimum eigen value of 2x2 derivative covariation matrix at each pixel - the cornerness criteria
|
||||
// (does BORDER_CONSTANT interpolation with 0 as the fill value)
|
||||
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize=3);
|
||||
CV_EXPORTS void cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType=BORDER_REFLECT101);
|
||||
|
||||
|
||||
//////////////////////////////// Filter Engine ////////////////////////////////
|
||||
|
108
modules/gpu/src/cuda/border_interpolate.hpp
Normal file
108
modules/gpu/src/cuda/border_interpolate.hpp
Normal file
@ -0,0 +1,108 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or bpied warranties, including, but not limited to, the bpied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
namespace cv { namespace gpu {
|
||||
|
||||
struct BrdReflect101
|
||||
{
|
||||
BrdReflect101(int len) : last(len - 1) {}
|
||||
|
||||
__device__ int idx_low(int i) const
|
||||
{
|
||||
return abs(i);
|
||||
}
|
||||
|
||||
__device__ int idx_high(int i) const
|
||||
{
|
||||
return last - abs(i - last);
|
||||
}
|
||||
|
||||
__device__ int idx(int i) const
|
||||
{
|
||||
return i <= last ? idx_low(i) : idx_high(i);
|
||||
}
|
||||
|
||||
bool is_range_safe(int mini, int maxi) const
|
||||
{
|
||||
return -last <= mini && maxi <= 2 * last;
|
||||
}
|
||||
|
||||
int last;
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct BrdRowReflect101: BrdReflect101
|
||||
{
|
||||
BrdRowReflect101(int len) : BrdReflect101(len) {}
|
||||
|
||||
__device__ float at_low(int i, const T* data) const
|
||||
{
|
||||
return data[idx_low(i)];
|
||||
}
|
||||
|
||||
__device__ float at_high(int i, const T* data) const
|
||||
{
|
||||
return data[idx_high(i)];
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
template <typename T>
|
||||
struct BrdColReflect101: BrdReflect101
|
||||
{
|
||||
BrdColReflect101(int len, int step) : BrdReflect101(len), step(step) {}
|
||||
|
||||
__device__ float at_low(int i, const T* data) const
|
||||
{
|
||||
return data[idx_low(i) * step];
|
||||
}
|
||||
|
||||
__device__ float at_high(int i, const T* data) const
|
||||
{
|
||||
return data[idx_high(i) * step];
|
||||
}
|
||||
|
||||
int step;
|
||||
};
|
||||
|
||||
}}
|
@ -94,6 +94,14 @@ namespace cv
|
||||
cudaSafeCall( cudaGetTextureReference(&tex, name) );
|
||||
cudaSafeCall( cudaUnbindTexture(tex) );
|
||||
}
|
||||
|
||||
// Available GPU border interpolation modes (named as CPU
|
||||
// border interpolation modes)
|
||||
enum
|
||||
{
|
||||
BORDER_REFLECT101 = 0,
|
||||
};
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -322,7 +322,8 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
|
||||
normalize_hists_kernel_many_blocks<256, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
else if (nthreads == 512)
|
||||
normalize_hists_kernel_many_blocks<512, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
|
||||
// We don't support bigger sizes of the block histograms
|
||||
else
|
||||
cv::gpu::error("normalize_hists: histogram's size is too big, try to decrease number of bins", __FILE__, __LINE__);
|
||||
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
@ -41,6 +41,7 @@
|
||||
//M*/
|
||||
|
||||
#include "cuda_shared.hpp"
|
||||
#include "border_interpolate.hpp"
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
@ -464,10 +465,40 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
reprojectImageTo3D_caller(disp, xyzw, q, stream);
|
||||
}
|
||||
|
||||
//////////////////////////////////////// Extract Cov Data ////////////////////////////////////////////////
|
||||
|
||||
__global__ void extractCovData_kernel(const int cols, const int rows, const PtrStepf Dx,
|
||||
const PtrStepf Dy, PtrStepf dst)
|
||||
{
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
if (x < cols && y < rows)
|
||||
{
|
||||
float dx = Dx.ptr(y)[x];
|
||||
float dy = Dy.ptr(y)[x];
|
||||
|
||||
dst.ptr(y)[x] = dx * dx;
|
||||
dst.ptr(y + rows)[x] = dx * dy;
|
||||
dst.ptr(y + (rows << 1))[x] = dy * dy;
|
||||
}
|
||||
}
|
||||
|
||||
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst)
|
||||
{
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(Dx.cols, threads.x), divUp(Dx.rows, threads.y));
|
||||
|
||||
extractCovData_kernel<<<grid, threads>>>(Dx.cols, Dx.rows, Dx, Dy, dst);
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
|
||||
|
||||
template <typename B>
|
||||
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
|
||||
const PtrStep Dx, const PtrStep Dy, PtrStep dst)
|
||||
const PtrStep Dx, const PtrStep Dy, PtrStep dst, B border_row,
|
||||
B border_col)
|
||||
{
|
||||
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@ -478,22 +509,21 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
float b = 0.f;
|
||||
float c = 0.f;
|
||||
|
||||
int offset1 = -(block_size / 2);
|
||||
int offset2 = offset1 + block_size;
|
||||
const int ibegin = y - (block_size / 2);
|
||||
const int jbegin = x - (block_size / 2);
|
||||
const int iend = ibegin + block_size;
|
||||
const int jend = jbegin + block_size;
|
||||
|
||||
unsigned int j_begin = max(x + offset1, 0);
|
||||
unsigned int i_begin = max(y + offset1, 0);
|
||||
unsigned int j_end = min(x + offset2, cols);
|
||||
unsigned int i_end = min(y + offset2, rows);
|
||||
|
||||
for (unsigned int i = i_begin; i < i_end; ++i)
|
||||
for (int i = ibegin; i < iend; ++i)
|
||||
{
|
||||
const float* dx_row = (const float*)Dx.ptr(i);
|
||||
const float* dy_row = (const float*)Dy.ptr(i);
|
||||
for (unsigned int j = j_begin; j < j_end; ++j)
|
||||
int y = border_col.idx(i);
|
||||
const float* dx_row = (const float*)Dx.ptr(y);
|
||||
const float* dy_row = (const float*)Dy.ptr(y);
|
||||
for (int j = jbegin; j < jend; ++j)
|
||||
{
|
||||
float dx = dx_row[j];
|
||||
float dy = dy_row[j];
|
||||
int x = border_row.idx(j);
|
||||
float dx = dx_row[x];
|
||||
float dy = dy_row[x];
|
||||
a += dx * dx;
|
||||
b += dx * dy;
|
||||
c += dy * dy;
|
||||
@ -504,7 +534,8 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst)
|
||||
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst,
|
||||
int border_type)
|
||||
{
|
||||
const int rows = Dx.rows;
|
||||
const int cols = Dx.cols;
|
||||
@ -512,14 +543,22 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
cornerHarris_kernel<<<grid, threads>>>(cols, rows, block_size, k, Dx, Dy, dst);
|
||||
switch (border_type)
|
||||
{
|
||||
case BORDER_REFLECT101:
|
||||
cornerHarris_kernel<<<grid, threads>>>(
|
||||
cols, rows, block_size, k, Dx, Dy, dst,
|
||||
BrdReflect101(cols), BrdReflect101(rows));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
/////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
|
||||
|
||||
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
|
||||
const PtrStep Dx, const PtrStep Dy, PtrStep dst)
|
||||
template <typename B>
|
||||
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, const PtrStep Dx,
|
||||
const PtrStep Dy, PtrStep dst, B border_row, B border_col)
|
||||
{
|
||||
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
@ -530,22 +569,21 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
float b = 0.f;
|
||||
float c = 0.f;
|
||||
|
||||
int offset1 = -(block_size / 2);
|
||||
int offset2 = offset1 + block_size;
|
||||
const int ibegin = y - (block_size / 2);
|
||||
const int jbegin = x - (block_size / 2);
|
||||
const int iend = ibegin + block_size;
|
||||
const int jend = jbegin + block_size;
|
||||
|
||||
unsigned int j_begin = max(x + offset1, 0);
|
||||
unsigned int i_begin = max(y + offset1, 0);
|
||||
unsigned int j_end = min(x + offset2, cols);
|
||||
unsigned int i_end = min(y + offset2, rows);
|
||||
|
||||
for (unsigned int i = i_begin; i < i_end; ++i)
|
||||
for (int i = ibegin; i < iend; ++i)
|
||||
{
|
||||
const float* dx_row = (const float*)Dx.ptr(i);
|
||||
const float* dy_row = (const float*)Dy.ptr(i);
|
||||
for (unsigned int j = j_begin; j < j_end; ++j)
|
||||
int y = border_col.idx(i);
|
||||
const float* dx_row = (const float*)Dx.ptr(y);
|
||||
const float* dy_row = (const float*)Dy.ptr(y);
|
||||
for (int j = jbegin; j < jend; ++j)
|
||||
{
|
||||
float dx = dx_row[j];
|
||||
float dy = dy_row[j];
|
||||
int x = border_row.idx(j);
|
||||
float dx = dx_row[x];
|
||||
float dy = dy_row[x];
|
||||
a += dx * dx;
|
||||
b += dx * dy;
|
||||
c += dy * dy;
|
||||
@ -558,7 +596,8 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
}
|
||||
}
|
||||
|
||||
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst)
|
||||
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst,
|
||||
int border_type)
|
||||
{
|
||||
const int rows = Dx.rows;
|
||||
const int cols = Dx.cols;
|
||||
@ -566,7 +605,14 @@ namespace cv { namespace gpu { namespace imgproc
|
||||
dim3 threads(32, 8);
|
||||
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
|
||||
|
||||
cornerMinEigenVal_kernel<<<grid, threads>>>(cols, rows, block_size, Dx, Dy, dst);
|
||||
switch (border_type)
|
||||
{
|
||||
case BORDER_REFLECT101:
|
||||
cornerMinEigenVal_kernel<<<grid, threads>>>(
|
||||
cols, rows, block_size, Dx, Dy, dst,
|
||||
BrdReflect101(cols), BrdReflect101(rows));
|
||||
break;
|
||||
}
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
}}}
|
||||
|
264
modules/gpu/src/cuda/linear_filters_beta.cu
Normal file
264
modules/gpu/src/cuda/linear_filters_beta.cu
Normal file
@ -0,0 +1,264 @@
|
||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
||||
//
|
||||
// By downloading, copying, installing or using the software you agree to this license.
|
||||
// If you do not agree to this license, do not download, install,
|
||||
// copy or use the software.
|
||||
//
|
||||
//
|
||||
// License Agreement
|
||||
// For Open Source Computer Vision Library
|
||||
//
|
||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
||||
// Third party copyrights are property of their respective owners.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification,
|
||||
// are permitted provided that the following conditions are met:
|
||||
//
|
||||
// * Redistribution's of source code must retain the above copyright notice,
|
||||
// this list of conditions and the following disclaimer.
|
||||
//
|
||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
||||
// this list of conditions and the following disclaimer in the documentation
|
||||
// and/or other materials provided with the distribution.
|
||||
//
|
||||
// * The name of the copyright holders may not be used to endorse or promote products
|
||||
// derived from this software without specific prior written permission.
|
||||
//
|
||||
// This software is provided by the copyright holders and contributors "as is" and
|
||||
// any express or implied warranties, including, but not limited to, the implied
|
||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
||||
// indirect, incidental, special, exemplary, or consequential damages
|
||||
// (including, but not limited to, procurement of substitute goods or services;
|
||||
// loss of use, data, or profits; or business interruption) however caused
|
||||
// and on any theory of liability, whether in contract, strict liability,
|
||||
// or tort (including negligence or otherwise) arising in any way out of
|
||||
// the use of this software, even if advised of the possibility of such damage.
|
||||
//
|
||||
//M*/
|
||||
|
||||
#include "opencv2/gpu/devmem2d.hpp"
|
||||
#include "safe_call.hpp"
|
||||
#include "cuda_shared.hpp"
|
||||
#include "border_interpolate.hpp"
|
||||
|
||||
#define BLOCK_DIM_X 16
|
||||
#define BLOCK_DIM_Y 16
|
||||
#define MAX_KERNEL_SIZE 16
|
||||
|
||||
using namespace cv::gpu;
|
||||
|
||||
|
||||
namespace cv { namespace gpu { namespace linear_filters {
|
||||
|
||||
|
||||
// Global linear kernel data storage
|
||||
__constant__ float ckernel[MAX_KERNEL_SIZE];
|
||||
|
||||
|
||||
void loadKernel(const float* kernel, int ksize)
|
||||
{
|
||||
cudaSafeCall(cudaMemcpyToSymbol(ckernel, kernel, ksize * sizeof(float)));
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename B, int ksize>
|
||||
__global__ void rowFilterKernel(const DevMem2D_<T> src, PtrStepf dst,
|
||||
int anchor, B border)
|
||||
{
|
||||
__shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
float* srow = smem + threadIdx.y * blockDim.x * 3;
|
||||
|
||||
if (y < src.rows)
|
||||
{
|
||||
const T* src_row = src.ptr(y);
|
||||
|
||||
srow[threadIdx.x + blockDim.x] = border.at_high(x, src_row);
|
||||
|
||||
srow[threadIdx.x] = border.at_low(x - blockDim.x, src_row);
|
||||
|
||||
srow[threadIdx.x + (blockDim.x << 1)] = border.at_high(x + blockDim.x, src_row);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (x < src.cols)
|
||||
{
|
||||
srow += threadIdx.x + blockDim.x - anchor;
|
||||
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < ksize; ++i)
|
||||
sum += srow[i] * ckernel[i];
|
||||
|
||||
dst.ptr(y)[x] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename B, int ksize>
|
||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)
|
||||
{
|
||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));
|
||||
|
||||
B border(src.cols);
|
||||
|
||||
if (!border.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1))
|
||||
cv::gpu::error("rowFilterCaller: can't use specified border extrapolation, image is too small, "
|
||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
||||
|
||||
rowFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename B>
|
||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
||||
const float* kernel, int ksize)
|
||||
{
|
||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);
|
||||
|
||||
static const Caller callers[] =
|
||||
{
|
||||
0, rowFilterCaller<T, B, 1>,
|
||||
rowFilterCaller<T, B, 2>, rowFilterCaller<T, B, 3>,
|
||||
rowFilterCaller<T, B, 4>, rowFilterCaller<T, B, 5>,
|
||||
rowFilterCaller<T, B, 6>, rowFilterCaller<T, B, 7>,
|
||||
rowFilterCaller<T, B, 8>, rowFilterCaller<T, B, 9>,
|
||||
rowFilterCaller<T, B, 10>, rowFilterCaller<T, B, 11>,
|
||||
rowFilterCaller<T, B, 12>, rowFilterCaller<T, B, 13>,
|
||||
rowFilterCaller<T, B, 14>, rowFilterCaller<T, B, 15>
|
||||
};
|
||||
|
||||
loadKernel(kernel, ksize);
|
||||
callers[ksize](src, dst, anchor);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
||||
const float* kernel, int ksize, int brd_interp)
|
||||
{
|
||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);
|
||||
|
||||
static const Caller callers[] =
|
||||
{
|
||||
rowFilterCaller<T, BrdRowReflect101<T> >
|
||||
};
|
||||
|
||||
callers[brd_interp](src, dst, anchor, kernel, ksize);
|
||||
}
|
||||
|
||||
|
||||
template void rowFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
|
||||
template void rowFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
|
||||
|
||||
|
||||
template <typename T, typename B, int ksize>
|
||||
__global__ void colFilterKernel(const DevMem2D_<T> src, PtrStepf dst, int anchor, B border)
|
||||
{
|
||||
__shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];
|
||||
|
||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
||||
|
||||
const int smem_step = blockDim.x;
|
||||
|
||||
float* scol = smem + threadIdx.x;
|
||||
|
||||
if (x < src.cols)
|
||||
{
|
||||
const T* src_col = src.data + x;
|
||||
|
||||
scol[(threadIdx.y + blockDim.y) * smem_step] = border.at_high(y, src_col);
|
||||
|
||||
scol[threadIdx.y * smem_step] = border.at_low(y - blockDim.y, src_col);
|
||||
|
||||
scol[(threadIdx.y + (blockDim.y << 1)) * smem_step] = border.at_high(y + blockDim.y, src_col);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (y < src.rows)
|
||||
{
|
||||
scol += (threadIdx.y + blockDim.y - anchor)* smem_step;
|
||||
|
||||
float sum = 0.f;
|
||||
for(int i = 0; i < ksize; ++i)
|
||||
sum += scol[i * smem_step] * ckernel[i];
|
||||
|
||||
dst.ptr(y)[x] = sum;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename B, int ksize>
|
||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)
|
||||
{
|
||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
||||
dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));
|
||||
|
||||
B border(src.rows, src.step / src.elem_size);
|
||||
|
||||
if (src.step - border.step * src.elem_size != 0)
|
||||
cv::gpu::error("colFilterCaller: src step must be multiple of its element size",
|
||||
__FILE__, __LINE__);
|
||||
|
||||
if (!border.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))
|
||||
cv::gpu::error("colFilterCaller: can't use specified border extrapolation, image is too small, "
|
||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
||||
|
||||
colFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);
|
||||
cudaSafeCall(cudaThreadSynchronize());
|
||||
}
|
||||
|
||||
|
||||
template <typename T, typename B>
|
||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
||||
const float* kernel, int ksize)
|
||||
{
|
||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);
|
||||
|
||||
static const Caller callers[] =
|
||||
{
|
||||
0, colFilterCaller<T, B, 1>,
|
||||
colFilterCaller<T, B, 2>, colFilterCaller<T, B, 3>,
|
||||
colFilterCaller<T, B, 4>, colFilterCaller<T, B, 5>,
|
||||
colFilterCaller<T, B, 6>, colFilterCaller<T, B, 7>,
|
||||
colFilterCaller<T, B, 8>, colFilterCaller<T, B, 9>,
|
||||
colFilterCaller<T, B, 10>, colFilterCaller<T, B, 11>,
|
||||
colFilterCaller<T, B, 12>, colFilterCaller<T, B, 13>,
|
||||
colFilterCaller<T, B, 14>, colFilterCaller<T, B, 15>
|
||||
};
|
||||
|
||||
loadKernel(kernel, ksize);
|
||||
callers[ksize](src, dst, anchor);
|
||||
}
|
||||
|
||||
|
||||
template <typename T>
|
||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
||||
const float* kernel, int ksize, int brd_interp)
|
||||
{
|
||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);
|
||||
|
||||
static const Caller callers[] =
|
||||
{
|
||||
colFilterCaller<T, BrdColReflect101<T> >
|
||||
};
|
||||
|
||||
callers[brd_interp](src, dst, anchor, kernel, ksize);
|
||||
}
|
||||
|
||||
|
||||
template void colFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
|
||||
template void colFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
|
||||
|
||||
}}}
|
@ -68,8 +68,8 @@ void cv::gpu::histEven(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::histEven(const GpuMat&, GpuMat*, int*, int*, int*) { throw_nogpu(); }
|
||||
void cv::gpu::histRange(const GpuMat&, GpuMat&, const GpuMat&) { throw_nogpu(); }
|
||||
void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu(); }
|
||||
void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double) { throw_nogpu(); }
|
||||
void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int) { throw_nogpu(); }
|
||||
void cv::gpu::cornerHarris(const GpuMat&, GpuMat&, int, int, double, int) { throw_nogpu(); }
|
||||
void cv::gpu::cornerMinEigenVal(const GpuMat&, GpuMat&, int, int, int) { throw_nogpu(); }
|
||||
|
||||
|
||||
#else /* !defined (HAVE_CUDA) */
|
||||
@ -861,49 +861,119 @@ void cv::gpu::histRange(const GpuMat& src, GpuMat hist[4], const GpuMat levels[4
|
||||
|
||||
namespace cv { namespace gpu { namespace imgproc {
|
||||
|
||||
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst);
|
||||
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst);
|
||||
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst);
|
||||
void cornerHarris_caller(const int block_size, const float k, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);
|
||||
void cornerMinEigenVal_caller(const int block_size, const DevMem2D Dx, const DevMem2D Dy, DevMem2D dst, int border_type);
|
||||
|
||||
}}}
|
||||
|
||||
namespace cv { namespace gpu { namespace linear_filters {
|
||||
|
||||
template <typename T>
|
||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel,
|
||||
int ksize, int brd_interp);
|
||||
|
||||
template <typename T>
|
||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel,
|
||||
int ksize, int brd_interp);
|
||||
|
||||
}}}
|
||||
|
||||
namespace
|
||||
{
|
||||
void computeGradients(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize)
|
||||
{
|
||||
CV_Assert(src.type() == CV_32F);
|
||||
|
||||
template <typename T>
|
||||
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)
|
||||
{
|
||||
double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
|
||||
if (ksize < 0) scale *= 2.;
|
||||
if (ksize < 0)
|
||||
scale *= 2.;
|
||||
if (src.depth() == CV_8U)
|
||||
scale *= 255.;
|
||||
scale = 1./scale;
|
||||
|
||||
if (ksize > 0)
|
||||
GpuMat tmp_buf(src.size(), CV_32F);
|
||||
Dx.create(src.size(), CV_32F);
|
||||
Dy.create(src.size(), CV_32F);
|
||||
Mat kx, ky;
|
||||
|
||||
getDerivKernels(kx, ky, 1, 0, ksize, false, CV_32F);
|
||||
kx = kx.reshape(1, 1) * scale;
|
||||
ky = ky.reshape(1, 1);
|
||||
|
||||
linear_filters::rowFilterCaller<T>(
|
||||
src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols,
|
||||
gpuBorderType);
|
||||
|
||||
linear_filters::colFilterCaller<float>(
|
||||
tmp_buf, Dx, ky.cols >> 1, ky.ptr<float>(0), ky.cols,
|
||||
gpuBorderType);
|
||||
|
||||
getDerivKernels(kx, ky, 0, 1, ksize, false, CV_32F);
|
||||
kx = kx.reshape(1, 1);
|
||||
ky = ky.reshape(1, 1) * scale;
|
||||
|
||||
linear_filters::rowFilterCaller<T>(
|
||||
src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols,
|
||||
gpuBorderType);
|
||||
|
||||
linear_filters::colFilterCaller<float>(
|
||||
tmp_buf, Dy, ky.cols >> 1, ky.ptr<float>(0), ky.cols,
|
||||
gpuBorderType);
|
||||
}
|
||||
|
||||
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)
|
||||
{
|
||||
switch (src.type())
|
||||
{
|
||||
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale);
|
||||
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale);
|
||||
}
|
||||
else
|
||||
{
|
||||
Scharr(src, Dx, CV_32F, 1, 0, scale);
|
||||
Scharr(src, Dy, CV_32F, 0, 1, scale);
|
||||
case CV_8U:
|
||||
extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
||||
break;
|
||||
case CV_32F:
|
||||
extractCovData<float>(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");
|
||||
}
|
||||
}
|
||||
|
||||
} // Anonymous namespace
|
||||
|
||||
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k, int borderType)
|
||||
{
|
||||
int gpuBorderType;
|
||||
switch (borderType)
|
||||
{
|
||||
case cv::BORDER_REFLECT101:
|
||||
gpuBorderType = cv::gpu::BORDER_REFLECT101;
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "cornerHarris: unsupported border type");
|
||||
}
|
||||
|
||||
GpuMat Dx, Dy;
|
||||
extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
||||
dst.create(src.size(), CV_32F);
|
||||
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);
|
||||
}
|
||||
|
||||
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, double k)
|
||||
{
|
||||
GpuMat Dx, Dy;
|
||||
computeGradients(src, Dx, Dy, blockSize, ksize);
|
||||
dst.create(src.size(), CV_32F);
|
||||
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst);
|
||||
}
|
||||
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
|
||||
{
|
||||
int gpuBorderType;
|
||||
switch (borderType)
|
||||
{
|
||||
case cv::BORDER_REFLECT101:
|
||||
gpuBorderType = cv::gpu::BORDER_REFLECT101;
|
||||
break;
|
||||
default:
|
||||
CV_Error(CV_StsBadArg, "cornerMinEigenVal: unsupported border type");
|
||||
}
|
||||
|
||||
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize)
|
||||
{
|
||||
GpuMat Dx, Dy;
|
||||
computeGradients(src, Dx, Dy, blockSize, ksize);
|
||||
extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
||||
dst.create(src.size(), CV_32F);
|
||||
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst);
|
||||
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);
|
||||
}
|
||||
|
||||
|
||||
#endif /* !defined (HAVE_CUDA) */
|
||||
|
||||
|
@ -616,9 +616,11 @@ struct CV_GpuCornerHarrisTest: CvTest
|
||||
{
|
||||
for (int i = 0; i < 5; ++i)
|
||||
{
|
||||
int rows = 10 + rand() % 300, cols = 10 + rand() % 300;
|
||||
int rows = 25 + rand() % 300, cols = 25 + rand() % 300;
|
||||
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
|
||||
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return;
|
||||
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
|
||||
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return;
|
||||
}
|
||||
}
|
||||
catch (const Exception& e)
|
||||
@ -634,22 +636,22 @@ struct CV_GpuCornerHarrisTest: CvTest
|
||||
cv::Mat src(rows, cols, depth);
|
||||
if (depth == CV_32F)
|
||||
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
|
||||
else if (depth == CV_8U)
|
||||
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
|
||||
|
||||
double k = 0.1;
|
||||
int borderType = BORDER_DEFAULT;
|
||||
int borderType = BORDER_REFLECT101;
|
||||
|
||||
cv::Mat dst_gold;
|
||||
cv::cornerHarris(src, dst_gold, blockSize, apertureSize, k, borderType);
|
||||
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k);
|
||||
|
||||
int asize = apertureSize > 0 ? apertureSize : 3;
|
||||
cv::gpu::cornerHarris(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, k, borderType);
|
||||
|
||||
cv::Mat dsth = dst;
|
||||
for (int i = max(blockSize, asize) + 2; i < dst.rows - max(blockSize, asize) - 2; ++i)
|
||||
for (int i = 0; i < dst.rows; ++i)
|
||||
{
|
||||
for (int j = max(blockSize, asize) + 2; j < dst.cols - max(blockSize, asize) - 2; ++j)
|
||||
for (int j = 0; j < dst.cols; ++j)
|
||||
{
|
||||
float a = dst_gold.at<float>(i, j);
|
||||
float b = dsth.at<float>(i, j);
|
||||
@ -678,9 +680,11 @@ struct CV_GpuCornerMinEigenValTest: CvTest
|
||||
{
|
||||
for (int i = 0; i < 3; ++i)
|
||||
{
|
||||
int rows = 10 + rand() % 300, cols = 10 + rand() % 300;
|
||||
int rows = 25 + rand() % 300, cols = 25 + rand() % 300;
|
||||
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, -1)) return;
|
||||
if (!compareToCpuTest(rows, cols, CV_32F, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
|
||||
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, -1)) return;
|
||||
if (!compareToCpuTest(rows, cols, CV_8U, 1 + rand() % 5, 1 + 2 * (rand() % 4))) return;
|
||||
}
|
||||
}
|
||||
catch (const Exception& e)
|
||||
@ -696,25 +700,25 @@ struct CV_GpuCornerMinEigenValTest: CvTest
|
||||
cv::Mat src(rows, cols, depth);
|
||||
if (depth == CV_32F)
|
||||
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(1));
|
||||
else if (depth == CV_8U)
|
||||
rng.fill(src, RNG::UNIFORM, cv::Scalar(0), cv::Scalar(256));
|
||||
|
||||
int borderType = BORDER_DEFAULT;
|
||||
int borderType = BORDER_REFLECT101;
|
||||
|
||||
cv::Mat dst_gold;
|
||||
cv::cornerMinEigenVal(src, dst_gold, blockSize, apertureSize, borderType);
|
||||
|
||||
cv::gpu::GpuMat dst;
|
||||
cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize);
|
||||
|
||||
int asize = apertureSize > 0 ? apertureSize : 3;
|
||||
cv::gpu::cornerMinEigenVal(cv::gpu::GpuMat(src), dst, blockSize, apertureSize, borderType);
|
||||
|
||||
cv::Mat dsth = dst;
|
||||
for (int i = max(blockSize, asize) + 2; i < dst.rows - max(blockSize, asize) - 2; ++i)
|
||||
for (int i = 0; i < dst.rows; ++i)
|
||||
{
|
||||
for (int j = max(blockSize, asize) + 2; j < dst.cols - max(blockSize, asize) - 2; ++j)
|
||||
for (int j = 0; j < dst.cols; ++j)
|
||||
{
|
||||
float a = dst_gold.at<float>(i, j);
|
||||
float b = dsth.at<float>(i, j);
|
||||
if (fabs(a - b) > 1e-3f)
|
||||
if (fabs(a - b) > 1e-2f)
|
||||
{
|
||||
ts->printf(CvTS::CONSOLE, "%d %d %f %f %d %d\n", i, j, a, b, apertureSize, blockSize);
|
||||
ts->set_failed_test_info(CvTS::FAIL_INVALID_OUTPUT);
|
||||
|
Loading…
x
Reference in New Issue
Block a user