added support of BORDER_REFLECT to gpu::cornerHarris and gpu::cornerMinEigenVal

This commit is contained in:
Vladislav Vinogradov 2012-01-23 07:14:45 +00:00
parent 347a7106ab
commit 1d1da9c5d6
3 changed files with 83 additions and 143 deletions

View File

@ -373,49 +373,17 @@ namespace cv { namespace gpu { namespace device
reprojectImageTo3D_caller(disp, xyzw, q, stream); reprojectImageTo3D_caller(disp, xyzw, q, stream);
} }
//////////////////////////////////////// Extract Cov Data //////////////////////////////////////////////// /////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
__global__ void extractCovData_kernel(const int cols, const int rows, const PtrStepf Dx, texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
const PtrStepf Dy, PtrStepf dst) texture<float, cudaTextureType2D, cudaReadModeElementType> harrisDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void cornerHarris_kernel(const int block_size, const float k, DevMem2Df dst)
{ {
const int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows) if (x < dst.cols && y < dst.rows)
{
float dx = Dx.ptr(y)[x];
float dy = Dy.ptr(y)[x];
dst.ptr(y)[x] = dx * dx;
dst.ptr(y + rows)[x] = dx * dy;
dst.ptr(y + (rows << 1))[x] = dy * dy;
}
}
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream)
{
dim3 threads(32, 8);
dim3 grid(divUp(Dx.cols, threads.x), divUp(Dx.rows, threads.y));
extractCovData_kernel<<<grid, threads, 0, stream>>>(Dx.cols, Dx.rows, Dx, Dy, dst);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
/////////////////////////////////////////// Corner Harris /////////////////////////////////////////////////
texture<float, 2> harrisDxTex;
texture<float, 2> harrisDyTex;
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
PtrStepb dst)
{
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows)
{ {
float a = 0.f; float a = 0.f;
float b = 0.f; float b = 0.f;
@ -432,24 +400,24 @@ namespace cv { namespace gpu { namespace device
{ {
float dx = tex2D(harrisDxTex, j, i); float dx = tex2D(harrisDxTex, j, i);
float dy = tex2D(harrisDyTex, j, i); float dy = tex2D(harrisDyTex, j, i);
a += dx * dx; a += dx * dx;
b += dx * dy; b += dx * dy;
c += dy * dy; c += dy * dy;
} }
} }
((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c); dst(y, x) = a * c - b * b - k * (a + c) * (a + c);
} }
} }
template <typename BR, typename BC> template <typename BR, typename BC>
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k, __global__ void cornerHarris_kernel(const int block_size, const float k, DevMem2Df dst, const BR border_row, const BC border_col)
PtrStepb dst, BR border_row, BC border_col)
{ {
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows) if (x < dst.cols && y < dst.rows)
{ {
float a = 0.f; float a = 0.f;
float b = 0.f; float b = 0.f;
@ -462,50 +430,45 @@ namespace cv { namespace gpu { namespace device
for (int i = ibegin; i < iend; ++i) for (int i = ibegin; i < iend; ++i)
{ {
int y = border_col.idx_row(i); const int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j) for (int j = jbegin; j < jend; ++j)
{ {
int x = border_row.idx_col(j); const int x = border_row.idx_col(j);
float dx = tex2D(harrisDxTex, x, y); float dx = tex2D(harrisDxTex, x, y);
float dy = tex2D(harrisDyTex, x, y); float dy = tex2D(harrisDyTex, x, y);
a += dx * dx; a += dx * dx;
b += dx * dy; b += dx * dy;
c += dy * dy; c += dy * dy;
} }
} }
((float*)dst.ptr(y))[x] = a * c - b * b - k * (a + c) * (a + c); dst(y, x) = a * c - b * b - k * (a + c) * (a + c);
} }
} }
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, void cornerHarris_gpu(int block_size, float k, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream)
int border_type, cudaStream_t stream)
{ {
const int rows = Dx.rows; dim3 block(32, 8);
const int cols = Dx.cols; dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
dim3 threads(32, 8); bindTexture(&harrisDxTex, Dx);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); bindTexture(&harrisDyTex, Dy);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, harrisDxTex, Dx.data, desc, Dx.cols, Dx.rows, Dx.step);
cudaBindTexture2D(0, harrisDyTex, Dy.data, desc, Dy.cols, Dy.rows, Dy.step);
harrisDxTex.filterMode = cudaFilterModePoint;
harrisDyTex.filterMode = cudaFilterModePoint;
switch (border_type) switch (border_type)
{ {
case BORDER_REFLECT101_GPU: case BORDER_REFLECT101_GPU:
cornerHarris_kernel<<<grid, threads, 0, stream>>>( cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
cols, rows, block_size, k, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));
break; break;
case BORDER_REPLICATE_GPU:
harrisDxTex.addressMode[0] = cudaAddressModeClamp;
harrisDxTex.addressMode[1] = cudaAddressModeClamp;
harrisDyTex.addressMode[0] = cudaAddressModeClamp;
harrisDyTex.addressMode[1] = cudaAddressModeClamp;
cornerHarris_kernel<<<grid, threads, 0, stream>>>(cols, rows, block_size, k, dst); case BORDER_REFLECT_GPU:
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
break;
case BORDER_REPLICATE_GPU:
cornerHarris_kernel<<<grid, block, 0, stream>>>(block_size, k, dst);
break; break;
} }
@ -513,23 +476,19 @@ namespace cv { namespace gpu { namespace device
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
//cudaSafeCall(cudaUnbindTexture(harrisDxTex));
//cudaSafeCall(cudaUnbindTexture(harrisDyTex));
} }
/////////////////////////////////////////// Corner Min Eigen Val ///////////////////////////////////////////////// /////////////////////////////////////////// Corner Min Eigen Val /////////////////////////////////////////////////
texture<float, 2> minEigenValDxTex; texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDxTex(0, cudaFilterModePoint, cudaAddressModeClamp);
texture<float, 2> minEigenValDyTex; texture<float, cudaTextureType2D, cudaReadModeElementType> minEigenValDyTex(0, cudaFilterModePoint, cudaAddressModeClamp);
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, __global__ void cornerMinEigenVal_kernel(const int block_size, DevMem2Df dst)
PtrStepb dst)
{ {
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows) if (x < dst.cols && y < dst.rows)
{ {
float a = 0.f; float a = 0.f;
float b = 0.f; float b = 0.f;
@ -546,6 +505,7 @@ namespace cv { namespace gpu { namespace device
{ {
float dx = tex2D(minEigenValDxTex, j, i); float dx = tex2D(minEigenValDxTex, j, i);
float dy = tex2D(minEigenValDyTex, j, i); float dy = tex2D(minEigenValDyTex, j, i);
a += dx * dx; a += dx * dx;
b += dx * dy; b += dx * dy;
c += dy * dy; c += dy * dy;
@ -554,19 +514,19 @@ namespace cv { namespace gpu { namespace device
a *= 0.5f; a *= 0.5f;
c *= 0.5f; c *= 0.5f;
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
} }
} }
template <typename BR, typename BC> template <typename BR, typename BC>
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, __global__ void cornerMinEigenVal_kernel(const int block_size, DevMem2Df dst, const BR border_row, const BC border_col)
PtrStepb dst, BR border_row, BC border_col)
{ {
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < cols && y < rows) if (x < dst.cols && y < dst.rows)
{ {
float a = 0.f; float a = 0.f;
float b = 0.f; float b = 0.f;
@ -580,11 +540,14 @@ namespace cv { namespace gpu { namespace device
for (int i = ibegin; i < iend; ++i) for (int i = ibegin; i < iend; ++i)
{ {
int y = border_col.idx_row(i); int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j) for (int j = jbegin; j < jend; ++j)
{ {
int x = border_row.idx_col(j); int x = border_row.idx_col(j);
float dx = tex2D(minEigenValDxTex, x, y); float dx = tex2D(minEigenValDxTex, x, y);
float dy = tex2D(minEigenValDyTex, x, y); float dy = tex2D(minEigenValDyTex, x, y);
a += dx * dx; a += dx * dx;
b += dx * dy; b += dx * dy;
c += dy * dy; c += dy * dy;
@ -593,38 +556,31 @@ namespace cv { namespace gpu { namespace device
a *= 0.5f; a *= 0.5f;
c *= 0.5f; c *= 0.5f;
((float*)dst.ptr(y))[x] = (a + c) - sqrtf((a - c) * (a - c) + b * b);
dst(y, x) = (a + c) - sqrtf((a - c) * (a - c) + b * b);
} }
} }
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, void cornerMinEigenVal_gpu(int block_size, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream)
int border_type, cudaStream_t stream)
{ {
const int rows = Dx.rows; dim3 block(32, 8);
const int cols = Dx.cols; dim3 grid(divUp(Dx.cols, block.x), divUp(Dx.rows, block.y));
dim3 threads(32, 8); bindTexture(&minEigenValDxTex, Dx);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y)); bindTexture(&minEigenValDyTex, Dy);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<float>();
cudaBindTexture2D(0, minEigenValDxTex, Dx.data, desc, Dx.cols, Dx.rows, Dx.step);
cudaBindTexture2D(0, minEigenValDyTex, Dy.data, desc, Dy.cols, Dy.rows, Dy.step);
minEigenValDxTex.filterMode = cudaFilterModePoint;
minEigenValDyTex.filterMode = cudaFilterModePoint;
switch (border_type) switch (border_type)
{ {
case BORDER_REFLECT101_GPU: case BORDER_REFLECT101_GPU:
cornerMinEigenVal_kernel<<<grid, threads, 0, stream>>>( cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect101<void>(Dx.cols), BrdColReflect101<void>(Dx.rows));
cols, rows, block_size, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));
break; break;
case BORDER_REPLICATE_GPU:
minEigenValDxTex.addressMode[0] = cudaAddressModeClamp;
minEigenValDxTex.addressMode[1] = cudaAddressModeClamp;
minEigenValDyTex.addressMode[0] = cudaAddressModeClamp;
minEigenValDyTex.addressMode[1] = cudaAddressModeClamp;
cornerMinEigenVal_kernel<<<grid, threads, 0, stream>>>(cols, rows, block_size, dst); case BORDER_REFLECT_GPU:
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst, BrdRowReflect<void>(Dx.cols), BrdColReflect<void>(Dx.rows));
break;
case BORDER_REPLICATE_GPU:
cornerMinEigenVal_kernel<<<grid, block, 0, stream>>>(block_size, dst);
break; break;
} }
@ -632,9 +588,6 @@ namespace cv { namespace gpu { namespace device
if (stream == 0) if (stream == 0)
cudaSafeCall(cudaDeviceSynchronize()); cudaSafeCall(cudaDeviceSynchronize());
//cudaSafeCall(cudaUnbindTexture(minEigenValDxTex));
//cudaSafeCall(cudaUnbindTexture(minEigenValDyTex));
} }
////////////////////////////// Column Sum ////////////////////////////////////// ////////////////////////////// Column Sum //////////////////////////////////////

View File

@ -1344,22 +1344,23 @@ namespace cv { namespace gpu { namespace device
{ {
namespace imgproc namespace imgproc
{ {
void extractCovData_caller(const DevMem2Df Dx, const DevMem2Df Dy, PtrStepf dst, cudaStream_t stream); void cornerHarris_gpu(int block_size, float k, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream);
void cornerHarris_caller(const int block_size, const float k, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream); void cornerMinEigenVal_gpu(int block_size, DevMem2Df Dx, DevMem2Df Dy, DevMem2Df dst, int border_type, cudaStream_t stream);
void cornerMinEigenVal_caller(const int block_size, const DevMem2Db Dx, const DevMem2Db Dy, DevMem2Db dst, int border_type, cudaStream_t stream);
} }
}}} }}}
namespace namespace
{ {
template <typename T>
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream) void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
{ {
double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize; double scale = static_cast<double>(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize;
if (ksize < 0) if (ksize < 0)
scale *= 2.; scale *= 2.;
if (src.depth() == CV_8U) if (src.depth() == CV_8U)
scale *= 255.; scale *= 255.;
scale = 1./scale; scale = 1./scale;
Dx.create(src.size(), CV_32F); Dx.create(src.size(), CV_32F);
@ -1376,23 +1377,7 @@ namespace
Scharr(src, Dy, CV_32F, 0, 1, buf, scale, borderType, -1, stream); Scharr(src, Dy, CV_32F, 0, 1, buf, scale, borderType, -1, stream);
} }
} }
}
void extractCovData(const GpuMat& src, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, int borderType, Stream& stream)
{
switch (src.type())
{
case CV_8U:
extractCovData<unsigned char>(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
break;
case CV_32F:
extractCovData<float>(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
break;
default:
CV_Error(CV_StsBadArg, "extractCovData: unsupported type of the source matrix");
}
}
} // Anonymous namespace
bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType) bool cv::gpu::tryConvertToGpuBorderType(int cpuBorderType, int& gpuBorderType)
{ {
@ -1433,17 +1418,18 @@ void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& D
void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream) void cv::gpu::cornerHarris(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuMat& Dy, GpuMat& buf, int blockSize, int ksize, double k, int borderType, Stream& stream)
{ {
using namespace ::cv::gpu::device::imgproc; using namespace cv::gpu::device::imgproc;
CV_Assert(borderType == cv::BORDER_REFLECT101 || CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
borderType == cv::BORDER_REPLICATE);
int gpuBorderType; int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream); extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
dst.create(src.size(), CV_32F); dst.create(src.size(), CV_32F);
cornerHarris_caller(blockSize, (float)k, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
cornerHarris_gpu(blockSize, static_cast<float>(k), Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
} }
void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType) void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, int blockSize, int ksize, int borderType)
@ -1462,15 +1448,16 @@ void cv::gpu::cornerMinEigenVal(const GpuMat& src, GpuMat& dst, GpuMat& Dx, GpuM
{ {
using namespace ::cv::gpu::device::imgproc; using namespace ::cv::gpu::device::imgproc;
CV_Assert(borderType == cv::BORDER_REFLECT101 || CV_Assert(borderType == cv::BORDER_REFLECT101 || borderType == cv::BORDER_REPLICATE || borderType == cv::BORDER_REFLECT);
borderType == cv::BORDER_REPLICATE);
int gpuBorderType; int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType)); CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream); extractCovData(src, Dx, Dy, buf, blockSize, ksize, borderType, stream);
dst.create(src.size(), CV_32F); dst.create(src.size(), CV_32F);
cornerMinEigenVal_caller(blockSize, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
cornerMinEigenVal_gpu(blockSize, Dx, Dy, dst, gpuBorderType, StreamAccessor::getStream(stream));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////

View File

@ -2774,13 +2774,13 @@ TEST_P(CornerHarris, Accuracy)
dev_dst.download(dst); dev_dst.download(dst);
); );
EXPECT_MAT_NEAR(dst_gold, dst, 1e-3); EXPECT_MAT_NEAR(dst_gold, dst, 0.02);
} }
INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, Combine( INSTANTIATE_TEST_CASE_P(ImgProc, CornerHarris, Combine(
ALL_DEVICES, ALL_DEVICES,
Values(CV_8UC1, CV_32FC1), Values(CV_8UC1, CV_32FC1),
Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE))); Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT)));
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// cornerMinEigen // cornerMinEigen
@ -2829,13 +2829,13 @@ TEST_P(CornerMinEigen, Accuracy)
dev_dst.download(dst); dev_dst.download(dst);
); );
EXPECT_MAT_NEAR(dst_gold, dst, 1e-2); EXPECT_MAT_NEAR(dst_gold, dst, 0.02);
} }
INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigen, Combine( INSTANTIATE_TEST_CASE_P(ImgProc, CornerMinEigen, Combine(
ALL_DEVICES, ALL_DEVICES,
Values(CV_8UC1, CV_32FC1), Values(CV_8UC1, CV_32FC1),
Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE))); Values((int) cv::BORDER_REFLECT101, (int) cv::BORDER_REPLICATE, (int) cv::BORDER_REFLECT)));
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// ColumnSum // ColumnSum