removed linear_filters_beta.cu as its functionality was moved into filters.cu
This commit is contained in:
parent
8274ed22e4
commit
fa446e7e35
@ -1,266 +0,0 @@
|
|||||||
/*M///////////////////////////////////////////////////////////////////////////////////////
|
|
||||||
//
|
|
||||||
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
|
|
||||||
//
|
|
||||||
// By downloading, copying, installing or using the software you agree to this license.
|
|
||||||
// If you do not agree to this license, do not download, install,
|
|
||||||
// copy or use the software.
|
|
||||||
//
|
|
||||||
//
|
|
||||||
// License Agreement
|
|
||||||
// For Open Source Computer Vision Library
|
|
||||||
//
|
|
||||||
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
|
|
||||||
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
|
|
||||||
// Third party copyrights are property of their respective owners.
|
|
||||||
//
|
|
||||||
// Redistribution and use in source and binary forms, with or without modification,
|
|
||||||
// are permitted provided that the following conditions are met:
|
|
||||||
//
|
|
||||||
// * Redistribution's of source code must retain the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer.
|
|
||||||
//
|
|
||||||
// * Redistribution's in binary form must reproduce the above copyright notice,
|
|
||||||
// this list of conditions and the following disclaimer in the documentation
|
|
||||||
// and/or other materials provided with the distribution.
|
|
||||||
//
|
|
||||||
// * The name of the copyright holders may not be used to endorse or promote products
|
|
||||||
// derived from this software without specific prior written permission.
|
|
||||||
//
|
|
||||||
// This software is provided by the copyright holders and contributors "as is" and
|
|
||||||
// any express or implied warranties, including, but not limited to, the implied
|
|
||||||
// warranties of merchantability and fitness for a particular purpose are disclaimed.
|
|
||||||
// In no event shall the Intel Corporation or contributors be liable for any direct,
|
|
||||||
// indirect, incidental, special, exemplary, or consequential damages
|
|
||||||
// (including, but not limited to, procurement of substitute goods or services;
|
|
||||||
// loss of use, data, or profits; or business interruption) however caused
|
|
||||||
// and on any theory of liability, whether in contract, strict liability,
|
|
||||||
// or tort (including negligence or otherwise) arising in any way out of
|
|
||||||
// the use of this software, even if advised of the possibility of such damage.
|
|
||||||
//
|
|
||||||
//M*/
|
|
||||||
|
|
||||||
#include "opencv2/gpu/devmem2d.hpp"
|
|
||||||
#include "opencv2/gpu/device/border_interpolate.hpp"
|
|
||||||
#include "safe_call.hpp"
|
|
||||||
#include "internal_shared.hpp"
|
|
||||||
|
|
||||||
#define BLOCK_DIM_X 16
|
|
||||||
#define BLOCK_DIM_Y 16
|
|
||||||
#define MAX_KERNEL_SIZE 16
|
|
||||||
|
|
||||||
using namespace cv::gpu;
|
|
||||||
using namespace cv::gpu::device;
|
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace linear_filters {
|
|
||||||
|
|
||||||
|
|
||||||
// Global linear kernel data storage
|
|
||||||
__constant__ float ckernel[MAX_KERNEL_SIZE];
|
|
||||||
|
|
||||||
|
|
||||||
void loadKernel(const float* kernel, int ksize)
|
|
||||||
{
|
|
||||||
cudaSafeCall(cudaMemcpyToSymbol(ckernel, kernel, ksize * sizeof(float)));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, typename B, int ksize>
|
|
||||||
__global__ void rowFilterKernel(const DevMem2D_<T> src, PtrStepf dst,
|
|
||||||
int anchor, B border)
|
|
||||||
{
|
|
||||||
__shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];
|
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
|
|
||||||
float* srow = smem + threadIdx.y * blockDim.x * 3;
|
|
||||||
|
|
||||||
if (y < src.rows)
|
|
||||||
{
|
|
||||||
const T* src_row = src.ptr(y);
|
|
||||||
|
|
||||||
srow[threadIdx.x + blockDim.x] = border.at_high(x, src_row);
|
|
||||||
|
|
||||||
srow[threadIdx.x] = border.at_low(x - blockDim.x, src_row);
|
|
||||||
|
|
||||||
srow[threadIdx.x + (blockDim.x << 1)] = border.at_high(x + blockDim.x, src_row);
|
|
||||||
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
if (x < src.cols)
|
|
||||||
{
|
|
||||||
srow += threadIdx.x + blockDim.x - anchor;
|
|
||||||
|
|
||||||
float sum = 0.f;
|
|
||||||
for (int i = 0; i < ksize; ++i)
|
|
||||||
sum += srow[i] * ckernel[i];
|
|
||||||
|
|
||||||
dst.ptr(y)[x] = sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, typename B, int ksize>
|
|
||||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)
|
|
||||||
{
|
|
||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
|
||||||
dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));
|
|
||||||
|
|
||||||
B border(src.cols);
|
|
||||||
|
|
||||||
if (!border.is_range_safe(-BLOCK_DIM_X, (grid.x + 1) * BLOCK_DIM_X - 1))
|
|
||||||
cv::gpu::error("rowFilterCaller: can't use specified border extrapolation, image is too small, "
|
|
||||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
|
||||||
|
|
||||||
rowFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, typename B>
|
|
||||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
|
||||||
const float* kernel, int ksize)
|
|
||||||
{
|
|
||||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);
|
|
||||||
|
|
||||||
static const Caller callers[] =
|
|
||||||
{
|
|
||||||
0, rowFilterCaller<T, B, 1>,
|
|
||||||
rowFilterCaller<T, B, 2>, rowFilterCaller<T, B, 3>,
|
|
||||||
rowFilterCaller<T, B, 4>, rowFilterCaller<T, B, 5>,
|
|
||||||
rowFilterCaller<T, B, 6>, rowFilterCaller<T, B, 7>,
|
|
||||||
rowFilterCaller<T, B, 8>, rowFilterCaller<T, B, 9>,
|
|
||||||
rowFilterCaller<T, B, 10>, rowFilterCaller<T, B, 11>,
|
|
||||||
rowFilterCaller<T, B, 12>, rowFilterCaller<T, B, 13>,
|
|
||||||
rowFilterCaller<T, B, 14>, rowFilterCaller<T, B, 15>
|
|
||||||
};
|
|
||||||
|
|
||||||
loadKernel(kernel, ksize);
|
|
||||||
callers[ksize](src, dst, anchor);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
|
||||||
const float* kernel, int ksize, int brd_interp)
|
|
||||||
{
|
|
||||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);
|
|
||||||
|
|
||||||
static const Caller callers[] =
|
|
||||||
{
|
|
||||||
rowFilterCaller<T, BrdRowReflect101<T> >,
|
|
||||||
rowFilterCaller<T, BrdRowReplicate<T> >
|
|
||||||
};
|
|
||||||
|
|
||||||
callers[brd_interp](src, dst, anchor, kernel, ksize);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template void rowFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
|
|
||||||
template void rowFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, typename B, int ksize>
|
|
||||||
__global__ void colFilterKernel(const DevMem2D_<T> src, PtrStepf dst, int anchor, B border)
|
|
||||||
{
|
|
||||||
__shared__ float smem[BLOCK_DIM_X * BLOCK_DIM_Y * 3];
|
|
||||||
|
|
||||||
const int x = blockIdx.x * blockDim.x + threadIdx.x;
|
|
||||||
const int y = blockIdx.y * blockDim.y + threadIdx.y;
|
|
||||||
|
|
||||||
const int smem_step = blockDim.x;
|
|
||||||
|
|
||||||
float* scol = smem + threadIdx.x;
|
|
||||||
|
|
||||||
if (x < src.cols)
|
|
||||||
{
|
|
||||||
const T* src_col = src.data + x;
|
|
||||||
|
|
||||||
scol[(threadIdx.y + blockDim.y) * smem_step] = border.at_high(y, src_col);
|
|
||||||
|
|
||||||
scol[threadIdx.y * smem_step] = border.at_low(y - blockDim.y, src_col);
|
|
||||||
|
|
||||||
scol[(threadIdx.y + (blockDim.y << 1)) * smem_step] = border.at_high(y + blockDim.y, src_col);
|
|
||||||
|
|
||||||
__syncthreads();
|
|
||||||
|
|
||||||
if (y < src.rows)
|
|
||||||
{
|
|
||||||
scol += (threadIdx.y + blockDim.y - anchor)* smem_step;
|
|
||||||
|
|
||||||
float sum = 0.f;
|
|
||||||
for(int i = 0; i < ksize; ++i)
|
|
||||||
sum += scol[i * smem_step] * ckernel[i];
|
|
||||||
|
|
||||||
dst.ptr(y)[x] = sum;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, typename B, int ksize>
|
|
||||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor)
|
|
||||||
{
|
|
||||||
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
|
|
||||||
dim3 grid(divUp(src.cols, threads.x), divUp(src.rows, threads.y));
|
|
||||||
|
|
||||||
B border(src.rows, src.step / src.elem_size);
|
|
||||||
|
|
||||||
if (src.step - border.step * src.elem_size != 0)
|
|
||||||
cv::gpu::error("colFilterCaller: src step must be multiple of its element size",
|
|
||||||
__FILE__, __LINE__);
|
|
||||||
|
|
||||||
if (!border.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))
|
|
||||||
cv::gpu::error("colFilterCaller: can't use specified border extrapolation, image is too small, "
|
|
||||||
"try bigger image or another border extrapolation mode", __FILE__, __LINE__);
|
|
||||||
|
|
||||||
colFilterKernel<T, B, ksize><<<grid, threads>>>(src, dst, anchor, border);
|
|
||||||
cudaSafeCall(cudaThreadSynchronize());
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T, typename B>
|
|
||||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
|
||||||
const float* kernel, int ksize)
|
|
||||||
{
|
|
||||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int);
|
|
||||||
|
|
||||||
static const Caller callers[] =
|
|
||||||
{
|
|
||||||
0, colFilterCaller<T, B, 1>,
|
|
||||||
colFilterCaller<T, B, 2>, colFilterCaller<T, B, 3>,
|
|
||||||
colFilterCaller<T, B, 4>, colFilterCaller<T, B, 5>,
|
|
||||||
colFilterCaller<T, B, 6>, colFilterCaller<T, B, 7>,
|
|
||||||
colFilterCaller<T, B, 8>, colFilterCaller<T, B, 9>,
|
|
||||||
colFilterCaller<T, B, 10>, colFilterCaller<T, B, 11>,
|
|
||||||
colFilterCaller<T, B, 12>, colFilterCaller<T, B, 13>,
|
|
||||||
colFilterCaller<T, B, 14>, colFilterCaller<T, B, 15>
|
|
||||||
};
|
|
||||||
|
|
||||||
loadKernel(kernel, ksize);
|
|
||||||
callers[ksize](src, dst, anchor);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor,
|
|
||||||
const float* kernel, int ksize, int brd_interp)
|
|
||||||
{
|
|
||||||
typedef void (*Caller)(const DevMem2D_<T>, PtrStepf, int, const float*, int);
|
|
||||||
|
|
||||||
static const Caller callers[] =
|
|
||||||
{
|
|
||||||
colFilterCaller<T, BrdColReflect101<T> >,
|
|
||||||
colFilterCaller<T, BrdColReplicate<T> >
|
|
||||||
};
|
|
||||||
|
|
||||||
callers[brd_interp](src, dst, anchor, kernel, ksize);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
template void colFilterCaller<unsigned char>(const DevMem2D_<unsigned char>, PtrStepf, int, const float*, int, int);
|
|
||||||
template void colFilterCaller<float>(const DevMem2D_<float>, PtrStepf, int, const float*, int, int);
|
|
||||||
|
|
||||||
}}}
|
|
@ -986,22 +986,10 @@ namespace cv { namespace gpu { namespace imgproc {
|
|||||||
|
|
||||||
}}}
|
}}}
|
||||||
|
|
||||||
namespace cv { namespace gpu { namespace linear_filters {
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void rowFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel,
|
|
||||||
int ksize, int brd_interp);
|
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
void colFilterCaller(const DevMem2D_<T> src, PtrStepf dst, int anchor, const float* kernel,
|
|
||||||
int ksize, int brd_interp);
|
|
||||||
|
|
||||||
}}}
|
|
||||||
|
|
||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)
|
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)
|
||||||
{
|
{
|
||||||
double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
|
double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
|
||||||
if (ksize < 0)
|
if (ksize < 0)
|
||||||
@ -1013,42 +1001,28 @@ namespace
|
|||||||
GpuMat tmp_buf(src.size(), CV_32F);
|
GpuMat tmp_buf(src.size(), CV_32F);
|
||||||
Dx.create(src.size(), CV_32F);
|
Dx.create(src.size(), CV_32F);
|
||||||
Dy.create(src.size(), CV_32F);
|
Dy.create(src.size(), CV_32F);
|
||||||
Mat kx, ky;
|
|
||||||
|
|
||||||
getDerivKernels(kx, ky, 1, 0, ksize, false, CV_32F);
|
if (ksize > 0)
|
||||||
kx = kx.reshape(1, 1) * scale;
|
{
|
||||||
ky = ky.reshape(1, 1);
|
Sobel(src, Dx, CV_32F, 1, 0, ksize, scale, borderType);
|
||||||
|
Sobel(src, Dy, CV_32F, 0, 1, ksize, scale, borderType);
|
||||||
linear_filters::rowFilterCaller<T>(
|
}
|
||||||
src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols,
|
else
|
||||||
gpuBorderType);
|
{
|
||||||
|
Scharr(src, Dx, CV_32F, 1, 0, scale, borderType);
|
||||||
linear_filters::colFilterCaller<float>(
|
Scharr(src, Dy, CV_32F, 0, 1, scale, borderType);
|
||||||
tmp_buf, Dx, ky.cols >> 1, ky.ptr<float>(0), ky.cols,
|
}
|
||||||
gpuBorderType);
|
|
||||||
|
|
||||||
getDerivKernels(kx, ky, 0, 1, ksize, false, CV_32F);
|
|
||||||
kx = kx.reshape(1, 1);
|
|
||||||
ky = ky.reshape(1, 1) * scale;
|
|
||||||
|
|
||||||
linear_filters::rowFilterCaller<T>(
|
|
||||||
src, tmp_buf, kx.cols >> 1, kx.ptr<float>(0), kx.cols,
|
|
||||||
gpuBorderType);
|
|
||||||
|
|
||||||
linear_filters::colFilterCaller<float>(
|
|
||||||
tmp_buf, Dy, ky.cols >> 1, ky.ptr<float>(0), ky.cols,
|
|
||||||
gpuBorderType);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int gpuBorderType)
|
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, int blockSize, int ksize, int borderType)
|
||||||
{
|
{
|
||||||
switch (src.type())
|
switch (src.type())
|
||||||
{
|
{
|
||||||
case CV_8U:
|
case CV_8U:
|
||||||
extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
extractCovData<unsigned char>(src, Dx, Dy, blockSize, ksize, borderType);
|
||||||
break;
|
break;
|
||||||
case CV_32F:
|
case CV_32F:
|
||||||
extractCovData<float>(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
extractCovData<float>(src, Dx, Dy, blockSize, ksize, borderType);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");
|
CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");
|
||||||
@ -1090,7 +1064,7 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, int blockSize, int ks
|
|||||||
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
||||||
|
|
||||||
GpuMat Dx, Dy;
|
GpuMat Dx, Dy;
|
||||||
extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
|
||||||
dst.create(src.size(), CV_32F);
|
dst.create(src.size(), CV_32F);
|
||||||
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);
|
imgproc::cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType);
|
||||||
}
|
}
|
||||||
@ -1104,7 +1078,7 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, i
|
|||||||
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
|
||||||
|
|
||||||
GpuMat Dx, Dy;
|
GpuMat Dx, Dy;
|
||||||
extractCovData(src, Dx, Dy, blockSize, ksize, gpuBorderType);
|
extractCovData(src, Dx, Dy, blockSize, ksize, borderType);
|
||||||
dst.create(src.size(), CV_32F);
|
dst.create(src.size(), CV_32F);
|
||||||
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);
|
imgproc::cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType);
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user